LLVM  4.0.0
SIInstrInfo.cpp
Go to the documentation of this file.
1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI Implementation of TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIInstrInfo.h"
16 #include "AMDGPUTargetMachine.h"
17 #include "GCNHazardRecognizer.h"
18 #include "SIDefines.h"
19 #include "SIMachineFunctionInfo.h"
24 #include "llvm/IR/Function.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/Support/Debug.h"
28 
29 using namespace llvm;
30 
31 // Must be at least 4 to be able to branch over minimum unconditional branch
32 // code. This is only for making it possible to write reasonably small tests for
33 // long branches.
34 static cl::opt<unsigned>
35 BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
36  cl::desc("Restrict range of branch instructions (DEBUG)"));
37 
39  : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
40 
41 //===----------------------------------------------------------------------===//
42 // TargetInstrInfo callbacks
43 //===----------------------------------------------------------------------===//
44 
45 static unsigned getNumOperandsNoGlue(SDNode *Node) {
46  unsigned N = Node->getNumOperands();
47  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
48  --N;
49  return N;
50 }
51 
53  SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
54  assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
55  return LastOp;
56 }
57 
58 /// \brief Returns true if both nodes have the same value for the given
59 /// operand \p Op, or if both nodes do not have this operand.
60 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
61  unsigned Opc0 = N0->getMachineOpcode();
62  unsigned Opc1 = N1->getMachineOpcode();
63 
64  int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
65  int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
66 
67  if (Op0Idx == -1 && Op1Idx == -1)
68  return true;
69 
70 
71  if ((Op0Idx == -1 && Op1Idx != -1) ||
72  (Op1Idx == -1 && Op0Idx != -1))
73  return false;
74 
75  // getNamedOperandIdx returns the index for the MachineInstr's operands,
76  // which includes the result as the first operand. We are indexing into the
77  // MachineSDNode's operands, so we need to skip the result operand to get
78  // the real index.
79  --Op0Idx;
80  --Op1Idx;
81 
82  return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
83 }
84 
86  AliasAnalysis *AA) const {
87  // TODO: The generic check fails for VALU instructions that should be
88  // rematerializable due to implicit reads of exec. We really want all of the
89  // generic logic for this except for this.
90  switch (MI.getOpcode()) {
91  case AMDGPU::V_MOV_B32_e32:
92  case AMDGPU::V_MOV_B32_e64:
93  case AMDGPU::V_MOV_B64_PSEUDO:
94  return true;
95  default:
96  return false;
97  }
98 }
99 
101  int64_t &Offset0,
102  int64_t &Offset1) const {
103  if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
104  return false;
105 
106  unsigned Opc0 = Load0->getMachineOpcode();
107  unsigned Opc1 = Load1->getMachineOpcode();
108 
109  // Make sure both are actually loads.
110  if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
111  return false;
112 
113  if (isDS(Opc0) && isDS(Opc1)) {
114 
115  // FIXME: Handle this case:
116  if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
117  return false;
118 
119  // Check base reg.
120  if (Load0->getOperand(1) != Load1->getOperand(1))
121  return false;
122 
123  // Check chain.
124  if (findChainOperand(Load0) != findChainOperand(Load1))
125  return false;
126 
127  // Skip read2 / write2 variants for simplicity.
128  // TODO: We should report true if the used offsets are adjacent (excluded
129  // st64 versions).
130  if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
131  AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
132  return false;
133 
134  Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
135  Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
136  return true;
137  }
138 
139  if (isSMRD(Opc0) && isSMRD(Opc1)) {
141 
142  // Check base reg.
143  if (Load0->getOperand(0) != Load1->getOperand(0))
144  return false;
145 
146  const ConstantSDNode *Load0Offset =
147  dyn_cast<ConstantSDNode>(Load0->getOperand(1));
148  const ConstantSDNode *Load1Offset =
149  dyn_cast<ConstantSDNode>(Load1->getOperand(1));
150 
151  if (!Load0Offset || !Load1Offset)
152  return false;
153 
154  // Check chain.
155  if (findChainOperand(Load0) != findChainOperand(Load1))
156  return false;
157 
158  Offset0 = Load0Offset->getZExtValue();
159  Offset1 = Load1Offset->getZExtValue();
160  return true;
161  }
162 
163  // MUBUF and MTBUF can access the same addresses.
164  if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
165 
166  // MUBUF and MTBUF have vaddr at different indices.
167  if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
168  findChainOperand(Load0) != findChainOperand(Load1) ||
169  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
170  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
171  return false;
172 
173  int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
174  int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
175 
176  if (OffIdx0 == -1 || OffIdx1 == -1)
177  return false;
178 
179  // getNamedOperandIdx returns the index for MachineInstrs. Since they
180  // inlcude the output in the operand list, but SDNodes don't, we need to
181  // subtract the index by one.
182  --OffIdx0;
183  --OffIdx1;
184 
185  SDValue Off0 = Load0->getOperand(OffIdx0);
186  SDValue Off1 = Load1->getOperand(OffIdx1);
187 
188  // The offset might be a FrameIndexSDNode.
189  if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
190  return false;
191 
192  Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
193  Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
194  return true;
195  }
196 
197  return false;
198 }
199 
200 static bool isStride64(unsigned Opc) {
201  switch (Opc) {
202  case AMDGPU::DS_READ2ST64_B32:
203  case AMDGPU::DS_READ2ST64_B64:
204  case AMDGPU::DS_WRITE2ST64_B32:
205  case AMDGPU::DS_WRITE2ST64_B64:
206  return true;
207  default:
208  return false;
209  }
210 }
211 
212 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
213  int64_t &Offset,
214  const TargetRegisterInfo *TRI) const {
215  unsigned Opc = LdSt.getOpcode();
216 
217  if (isDS(LdSt)) {
218  const MachineOperand *OffsetImm =
219  getNamedOperand(LdSt, AMDGPU::OpName::offset);
220  if (OffsetImm) {
221  // Normal, single offset LDS instruction.
222  const MachineOperand *AddrReg =
223  getNamedOperand(LdSt, AMDGPU::OpName::addr);
224 
225  BaseReg = AddrReg->getReg();
226  Offset = OffsetImm->getImm();
227  return true;
228  }
229 
230  // The 2 offset instructions use offset0 and offset1 instead. We can treat
231  // these as a load with a single offset if the 2 offsets are consecutive. We
232  // will use this for some partially aligned loads.
233  const MachineOperand *Offset0Imm =
234  getNamedOperand(LdSt, AMDGPU::OpName::offset0);
235  const MachineOperand *Offset1Imm =
236  getNamedOperand(LdSt, AMDGPU::OpName::offset1);
237 
238  uint8_t Offset0 = Offset0Imm->getImm();
239  uint8_t Offset1 = Offset1Imm->getImm();
240 
241  if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
242  // Each of these offsets is in element sized units, so we need to convert
243  // to bytes of the individual reads.
244 
245  unsigned EltSize;
246  if (LdSt.mayLoad())
247  EltSize = getOpRegClass(LdSt, 0)->getSize() / 2;
248  else {
249  assert(LdSt.mayStore());
250  int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
251  EltSize = getOpRegClass(LdSt, Data0Idx)->getSize();
252  }
253 
254  if (isStride64(Opc))
255  EltSize *= 64;
256 
257  const MachineOperand *AddrReg =
258  getNamedOperand(LdSt, AMDGPU::OpName::addr);
259  BaseReg = AddrReg->getReg();
260  Offset = EltSize * Offset0;
261  return true;
262  }
263 
264  return false;
265  }
266 
267  if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
268  const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
269  if (SOffset && SOffset->isReg())
270  return false;
271 
272  const MachineOperand *AddrReg =
273  getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
274  if (!AddrReg)
275  return false;
276 
277  const MachineOperand *OffsetImm =
278  getNamedOperand(LdSt, AMDGPU::OpName::offset);
279  BaseReg = AddrReg->getReg();
280  Offset = OffsetImm->getImm();
281 
282  if (SOffset) // soffset can be an inline immediate.
283  Offset += SOffset->getImm();
284 
285  return true;
286  }
287 
288  if (isSMRD(LdSt)) {
289  const MachineOperand *OffsetImm =
290  getNamedOperand(LdSt, AMDGPU::OpName::offset);
291  if (!OffsetImm)
292  return false;
293 
294  const MachineOperand *SBaseReg =
295  getNamedOperand(LdSt, AMDGPU::OpName::sbase);
296  BaseReg = SBaseReg->getReg();
297  Offset = OffsetImm->getImm();
298  return true;
299  }
300 
301  if (isFLAT(LdSt)) {
302  const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
303  BaseReg = AddrReg->getReg();
304  Offset = 0;
305  return true;
306  }
307 
308  return false;
309 }
310 
312  MachineInstr &SecondLdSt,
313  unsigned NumLoads) const {
314  const MachineOperand *FirstDst = nullptr;
315  const MachineOperand *SecondDst = nullptr;
316 
317  if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
318  (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt))) {
319  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
320  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
321  } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
322  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
323  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
324  } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
325  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
326  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
327  }
328 
329  if (!FirstDst || !SecondDst)
330  return false;
331 
332  // Try to limit clustering based on the total number of bytes loaded
333  // rather than the number of instructions. This is done to help reduce
334  // register pressure. The method used is somewhat inexact, though,
335  // because it assumes that all loads in the cluster will load the
336  // same number of bytes as FirstLdSt.
337 
338  // The unit of this value is bytes.
339  // FIXME: This needs finer tuning.
340  unsigned LoadClusterThreshold = 16;
341 
342  const MachineRegisterInfo &MRI =
343  FirstLdSt.getParent()->getParent()->getRegInfo();
344  const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
345 
346  return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold;
347 }
348 
351  const DebugLoc &DL, unsigned DestReg,
352  unsigned SrcReg, bool KillSrc) const {
353  const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
354 
355  if (RC == &AMDGPU::VGPR_32RegClass) {
356  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
357  AMDGPU::SReg_32RegClass.contains(SrcReg));
358  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
359  .addReg(SrcReg, getKillRegState(KillSrc));
360  return;
361  }
362 
363  if (RC == &AMDGPU::SReg_32_XM0RegClass ||
364  RC == &AMDGPU::SReg_32RegClass) {
365  if (SrcReg == AMDGPU::SCC) {
366  BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
367  .addImm(-1)
368  .addImm(0);
369  return;
370  }
371 
372  assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
373  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
374  .addReg(SrcReg, getKillRegState(KillSrc));
375  return;
376  }
377 
378  if (RC == &AMDGPU::SReg_64RegClass) {
379  if (DestReg == AMDGPU::VCC) {
380  if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
381  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
382  .addReg(SrcReg, getKillRegState(KillSrc));
383  } else {
384  // FIXME: Hack until VReg_1 removed.
385  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
386  BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
387  .addImm(0)
388  .addReg(SrcReg, getKillRegState(KillSrc));
389  }
390 
391  return;
392  }
393 
394  assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
395  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
396  .addReg(SrcReg, getKillRegState(KillSrc));
397  return;
398  }
399 
400  if (DestReg == AMDGPU::SCC) {
401  assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
402  BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
403  .addReg(SrcReg, getKillRegState(KillSrc))
404  .addImm(0);
405  return;
406  }
407 
408  unsigned EltSize = 4;
409  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
410  if (RI.isSGPRClass(RC)) {
411  if (RC->getSize() > 4) {
412  Opcode = AMDGPU::S_MOV_B64;
413  EltSize = 8;
414  } else {
415  Opcode = AMDGPU::S_MOV_B32;
416  EltSize = 4;
417  }
418  }
419 
420  ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
421  bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
422 
423  for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
424  unsigned SubIdx;
425  if (Forward)
426  SubIdx = SubIndices[Idx];
427  else
428  SubIdx = SubIndices[SubIndices.size() - Idx - 1];
429 
430  MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
431  get(Opcode), RI.getSubReg(DestReg, SubIdx));
432 
433  Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
434 
435  if (Idx == SubIndices.size() - 1)
436  Builder.addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit);
437 
438  if (Idx == 0)
439  Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
440 
441  Builder.addReg(SrcReg, RegState::Implicit);
442  }
443 }
444 
445 int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
446  int NewOpc;
447 
448  // Try to map original to commuted opcode
449  NewOpc = AMDGPU::getCommuteRev(Opcode);
450  if (NewOpc != -1)
451  // Check if the commuted (REV) opcode exists on the target.
452  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
453 
454  // Try to map commuted to original opcode
455  NewOpc = AMDGPU::getCommuteOrig(Opcode);
456  if (NewOpc != -1)
457  // Check if the original (non-REV) opcode exists on the target.
458  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
459 
460  return Opcode;
461 }
462 
463 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
464 
465  if (DstRC->getSize() == 4) {
466  return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
467  } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
468  return AMDGPU::S_MOV_B64;
469  } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
470  return AMDGPU::V_MOV_B64_PSEUDO;
471  }
472  return AMDGPU::COPY;
473 }
474 
475 static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
476  switch (Size) {
477  case 4:
478  return AMDGPU::SI_SPILL_S32_SAVE;
479  case 8:
480  return AMDGPU::SI_SPILL_S64_SAVE;
481  case 16:
482  return AMDGPU::SI_SPILL_S128_SAVE;
483  case 32:
484  return AMDGPU::SI_SPILL_S256_SAVE;
485  case 64:
486  return AMDGPU::SI_SPILL_S512_SAVE;
487  default:
488  llvm_unreachable("unknown register size");
489  }
490 }
491 
492 static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
493  switch (Size) {
494  case 4:
495  return AMDGPU::SI_SPILL_V32_SAVE;
496  case 8:
497  return AMDGPU::SI_SPILL_V64_SAVE;
498  case 12:
499  return AMDGPU::SI_SPILL_V96_SAVE;
500  case 16:
501  return AMDGPU::SI_SPILL_V128_SAVE;
502  case 32:
503  return AMDGPU::SI_SPILL_V256_SAVE;
504  case 64:
505  return AMDGPU::SI_SPILL_V512_SAVE;
506  default:
507  llvm_unreachable("unknown register size");
508  }
509 }
510 
513  unsigned SrcReg, bool isKill,
514  int FrameIndex,
515  const TargetRegisterClass *RC,
516  const TargetRegisterInfo *TRI) const {
517  MachineFunction *MF = MBB.getParent();
519  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
520  DebugLoc DL = MBB.findDebugLoc(MI);
521 
522  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
523  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
524  MachinePointerInfo PtrInfo
525  = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
526  MachineMemOperand *MMO
528  Size, Align);
529 
530  if (RI.isSGPRClass(RC)) {
531  MFI->setHasSpilledSGPRs();
532 
533  // We are only allowed to create one new instruction when spilling
534  // registers, so we need to use pseudo instruction for spilling SGPRs.
535  const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize()));
536 
537  // The SGPR spill/restore instructions only work on number sgprs, so we need
538  // to make sure we are using the correct register class.
539  if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) {
541  MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
542  }
543 
544  MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
545  .addReg(SrcReg, getKillRegState(isKill)) // data
546  .addFrameIndex(FrameIndex) // addr
547  .addMemOperand(MMO)
550  // Add the scratch resource registers as implicit uses because we may end up
551  // needing them, and need to ensure that the reserved registers are
552  // correctly handled.
553 
554  if (ST.hasScalarStores()) {
555  // m0 is used for offset to scalar stores if used to spill.
556  Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
557  }
558 
559  return;
560  }
561 
562  if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
563  LLVMContext &Ctx = MF->getFunction()->getContext();
564  Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
565  " spill register");
566  BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
567  .addReg(SrcReg);
568 
569  return;
570  }
571 
572  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
573 
574  unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
575  MFI->setHasSpilledVGPRs();
576  BuildMI(MBB, MI, DL, get(Opcode))
577  .addReg(SrcReg, getKillRegState(isKill)) // data
578  .addFrameIndex(FrameIndex) // addr
579  .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
580  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
581  .addImm(0) // offset
582  .addMemOperand(MMO);
583 }
584 
585 static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
586  switch (Size) {
587  case 4:
588  return AMDGPU::SI_SPILL_S32_RESTORE;
589  case 8:
590  return AMDGPU::SI_SPILL_S64_RESTORE;
591  case 16:
592  return AMDGPU::SI_SPILL_S128_RESTORE;
593  case 32:
594  return AMDGPU::SI_SPILL_S256_RESTORE;
595  case 64:
596  return AMDGPU::SI_SPILL_S512_RESTORE;
597  default:
598  llvm_unreachable("unknown register size");
599  }
600 }
601 
602 static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
603  switch (Size) {
604  case 4:
605  return AMDGPU::SI_SPILL_V32_RESTORE;
606  case 8:
607  return AMDGPU::SI_SPILL_V64_RESTORE;
608  case 12:
609  return AMDGPU::SI_SPILL_V96_RESTORE;
610  case 16:
611  return AMDGPU::SI_SPILL_V128_RESTORE;
612  case 32:
613  return AMDGPU::SI_SPILL_V256_RESTORE;
614  case 64:
615  return AMDGPU::SI_SPILL_V512_RESTORE;
616  default:
617  llvm_unreachable("unknown register size");
618  }
619 }
620 
623  unsigned DestReg, int FrameIndex,
624  const TargetRegisterClass *RC,
625  const TargetRegisterInfo *TRI) const {
626  MachineFunction *MF = MBB.getParent();
628  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
629  DebugLoc DL = MBB.findDebugLoc(MI);
630  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
631  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
632 
633  MachinePointerInfo PtrInfo
634  = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
635 
637  PtrInfo, MachineMemOperand::MOLoad, Size, Align);
638 
639  if (RI.isSGPRClass(RC)) {
640  // FIXME: Maybe this should not include a memoperand because it will be
641  // lowered to non-memory instructions.
642  const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize()));
643  if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) {
645  MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
646  }
647 
648  MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
649  .addFrameIndex(FrameIndex) // addr
650  .addMemOperand(MMO)
653 
654  if (ST.hasScalarStores()) {
655  // m0 is used for offset to scalar stores if used to spill.
656  Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
657  }
658 
659  return;
660  }
661 
662  if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
663  LLVMContext &Ctx = MF->getFunction()->getContext();
664  Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
665  " restore register");
666  BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
667 
668  return;
669  }
670 
671  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
672 
673  unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
674  BuildMI(MBB, MI, DL, get(Opcode), DestReg)
675  .addFrameIndex(FrameIndex) // vaddr
676  .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
677  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
678  .addImm(0) // offset
679  .addMemOperand(MMO);
680 }
681 
682 /// \param @Offset Offset in bytes of the FrameIndex being spilled
684  MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
685  unsigned FrameOffset, unsigned Size) const {
686  MachineFunction *MF = MBB.getParent();
688  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
689  const SIRegisterInfo *TRI = ST.getRegisterInfo();
690  DebugLoc DL = MBB.findDebugLoc(MI);
691  unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
692  unsigned WavefrontSize = ST.getWavefrontSize();
693 
694  unsigned TIDReg = MFI->getTIDReg();
695  if (!MFI->hasCalculatedTID()) {
696  MachineBasicBlock &Entry = MBB.getParent()->front();
697  MachineBasicBlock::iterator Insert = Entry.front();
698  DebugLoc DL = Insert->getDebugLoc();
699 
700  TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
701  *MF);
702  if (TIDReg == AMDGPU::NoRegister)
703  return TIDReg;
704 
706  WorkGroupSize > WavefrontSize) {
707 
708  unsigned TIDIGXReg
709  = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
710  unsigned TIDIGYReg
711  = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
712  unsigned TIDIGZReg
713  = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
714  unsigned InputPtrReg =
715  TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
716  for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
717  if (!Entry.isLiveIn(Reg))
718  Entry.addLiveIn(Reg);
719  }
720 
721  RS->enterBasicBlock(Entry);
722  // FIXME: Can we scavenge an SReg_64 and access the subregs?
723  unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
724  unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
725  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
726  .addReg(InputPtrReg)
728  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
729  .addReg(InputPtrReg)
731 
732  // NGROUPS.X * NGROUPS.Y
733  BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
734  .addReg(STmp1)
735  .addReg(STmp0);
736  // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
737  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
738  .addReg(STmp1)
739  .addReg(TIDIGXReg);
740  // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
741  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
742  .addReg(STmp0)
743  .addReg(TIDIGYReg)
744  .addReg(TIDReg);
745  // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
746  BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
747  .addReg(TIDReg)
748  .addReg(TIDIGZReg);
749  } else {
750  // Get the wave id
751  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
752  TIDReg)
753  .addImm(-1)
754  .addImm(0);
755 
756  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
757  TIDReg)
758  .addImm(-1)
759  .addReg(TIDReg);
760  }
761 
762  BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
763  TIDReg)
764  .addImm(2)
765  .addReg(TIDReg);
766  MFI->setTIDReg(TIDReg);
767  }
768 
769  // Add FrameIndex to LDS offset
770  unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
771  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
772  .addImm(LDSOffset)
773  .addReg(TIDReg);
774 
775  return TmpReg;
776 }
777 
780  int Count) const {
781  DebugLoc DL = MBB.findDebugLoc(MI);
782  while (Count > 0) {
783  int Arg;
784  if (Count >= 8)
785  Arg = 7;
786  else
787  Arg = Count - 1;
788  Count -= 8;
789  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
790  .addImm(Arg);
791  }
792 }
793 
796  insertWaitStates(MBB, MI, 1);
797 }
798 
800  switch (MI.getOpcode()) {
801  default: return 1; // FIXME: Do wait states equal cycles?
802 
803  case AMDGPU::S_NOP:
804  return MI.getOperand(0).getImm() + 1;
805  }
806 }
807 
810  DebugLoc DL = MBB.findDebugLoc(MI);
811  switch (MI.getOpcode()) {
812  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
813  case AMDGPU::S_MOV_B64_term: {
814  // This is only a terminator to get the correct spill code placement during
815  // register allocation.
816  MI.setDesc(get(AMDGPU::S_MOV_B64));
817  break;
818  }
819  case AMDGPU::S_XOR_B64_term: {
820  // This is only a terminator to get the correct spill code placement during
821  // register allocation.
822  MI.setDesc(get(AMDGPU::S_XOR_B64));
823  break;
824  }
825  case AMDGPU::S_ANDN2_B64_term: {
826  // This is only a terminator to get the correct spill code placement during
827  // register allocation.
828  MI.setDesc(get(AMDGPU::S_ANDN2_B64));
829  break;
830  }
831  case AMDGPU::V_MOV_B64_PSEUDO: {
832  unsigned Dst = MI.getOperand(0).getReg();
833  unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
834  unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
835 
836  const MachineOperand &SrcOp = MI.getOperand(1);
837  // FIXME: Will this work for 64-bit floating point immediates?
838  assert(!SrcOp.isFPImm());
839  if (SrcOp.isImm()) {
840  APInt Imm(64, SrcOp.getImm());
841  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
842  .addImm(Imm.getLoBits(32).getZExtValue())
843  .addReg(Dst, RegState::Implicit | RegState::Define);
844  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
845  .addImm(Imm.getHiBits(32).getZExtValue())
846  .addReg(Dst, RegState::Implicit | RegState::Define);
847  } else {
848  assert(SrcOp.isReg());
849  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
850  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
852  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
853  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
855  }
856  MI.eraseFromParent();
857  break;
858  }
859  case AMDGPU::V_MOVRELD_B32_V1:
860  case AMDGPU::V_MOVRELD_B32_V2:
861  case AMDGPU::V_MOVRELD_B32_V4:
862  case AMDGPU::V_MOVRELD_B32_V8:
863  case AMDGPU::V_MOVRELD_B32_V16: {
864  const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
865  unsigned VecReg = MI.getOperand(0).getReg();
866  bool IsUndef = MI.getOperand(1).isUndef();
867  unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
868  assert(VecReg == MI.getOperand(1).getReg());
869 
870  MachineInstr *MovRel =
871  BuildMI(MBB, MI, DL, MovRelDesc)
872  .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
873  .addOperand(MI.getOperand(2))
874  .addReg(VecReg, RegState::ImplicitDefine)
875  .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
876 
877  const int ImpDefIdx =
878  MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
879  const int ImpUseIdx = ImpDefIdx + 1;
880  MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
881 
882  MI.eraseFromParent();
883  break;
884  }
885  case AMDGPU::SI_PC_ADD_REL_OFFSET: {
886  MachineFunction &MF = *MBB.getParent();
887  unsigned Reg = MI.getOperand(0).getReg();
888  unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
889  unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
890 
891  // Create a bundle so these instructions won't be re-ordered by the
892  // post-RA scheduler.
893  MIBundleBuilder Bundler(MBB, MI);
894  Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
895 
896  // Add 32-bit offset from this instruction to the start of the
897  // constant data.
898  Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
899  .addReg(RegLo)
900  .addOperand(MI.getOperand(1)));
901 
902  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
903  .addReg(RegHi);
905  MIB.addImm(0);
906  else
907  MIB.addOperand(MI.getOperand(2));
908 
909  Bundler.append(MIB);
910  llvm::finalizeBundle(MBB, Bundler.begin());
911 
912  MI.eraseFromParent();
913  break;
914  }
915  }
916  return true;
917 }
918 
920  MachineOperand &Src0,
921  unsigned Src0OpName,
922  MachineOperand &Src1,
923  unsigned Src1OpName) const {
924  MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
925  if (!Src0Mods)
926  return false;
927 
928  MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
929  assert(Src1Mods &&
930  "All commutable instructions have both src0 and src1 modifiers");
931 
932  int Src0ModsVal = Src0Mods->getImm();
933  int Src1ModsVal = Src1Mods->getImm();
934 
935  Src1Mods->setImm(Src0ModsVal);
936  Src0Mods->setImm(Src1ModsVal);
937  return true;
938 }
939 
941  MachineOperand &RegOp,
942  MachineOperand &NonRegOp) {
943  unsigned Reg = RegOp.getReg();
944  unsigned SubReg = RegOp.getSubReg();
945  bool IsKill = RegOp.isKill();
946  bool IsDead = RegOp.isDead();
947  bool IsUndef = RegOp.isUndef();
948  bool IsDebug = RegOp.isDebug();
949 
950  if (NonRegOp.isImm())
951  RegOp.ChangeToImmediate(NonRegOp.getImm());
952  else if (NonRegOp.isFI())
953  RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
954  else
955  return nullptr;
956 
957  NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
958  NonRegOp.setSubReg(SubReg);
959 
960  return &MI;
961 }
962 
964  unsigned Src0Idx,
965  unsigned Src1Idx) const {
966  assert(!NewMI && "this should never be used");
967 
968  unsigned Opc = MI.getOpcode();
969  int CommutedOpcode = commuteOpcode(Opc);
970  if (CommutedOpcode == -1)
971  return nullptr;
972 
973  assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
974  static_cast<int>(Src0Idx) &&
975  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
976  static_cast<int>(Src1Idx) &&
977  "inconsistency with findCommutedOpIndices");
978 
979  MachineOperand &Src0 = MI.getOperand(Src0Idx);
980  MachineOperand &Src1 = MI.getOperand(Src1Idx);
981 
982  MachineInstr *CommutedMI = nullptr;
983  if (Src0.isReg() && Src1.isReg()) {
984  if (isOperandLegal(MI, Src1Idx, &Src0)) {
985  // Be sure to copy the source modifiers to the right place.
986  CommutedMI
987  = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
988  }
989 
990  } else if (Src0.isReg() && !Src1.isReg()) {
991  // src0 should always be able to support any operand type, so no need to
992  // check operand legality.
993  CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
994  } else if (!Src0.isReg() && Src1.isReg()) {
995  if (isOperandLegal(MI, Src1Idx, &Src0))
996  CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
997  } else {
998  // FIXME: Found two non registers to commute. This does happen.
999  return nullptr;
1000  }
1001 
1002 
1003  if (CommutedMI) {
1004  swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
1005  Src1, AMDGPU::OpName::src1_modifiers);
1006 
1007  CommutedMI->setDesc(get(CommutedOpcode));
1008  }
1009 
1010  return CommutedMI;
1011 }
1012 
1013 // This needs to be implemented because the source modifiers may be inserted
1014 // between the true commutable operands, and the base
1015 // TargetInstrInfo::commuteInstruction uses it.
1017  unsigned &SrcOpIdx1) const {
1018  if (!MI.isCommutable())
1019  return false;
1020 
1021  unsigned Opc = MI.getOpcode();
1022  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1023  if (Src0Idx == -1)
1024  return false;
1025 
1026  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1027  if (Src1Idx == -1)
1028  return false;
1029 
1030  return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
1031 }
1032 
1033 bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1034  int64_t BrOffset) const {
1035  // BranchRelaxation should never have to check s_setpc_b64 because its dest
1036  // block is unanalyzable.
1037  assert(BranchOp != AMDGPU::S_SETPC_B64);
1038 
1039  // Convert to dwords.
1040  BrOffset /= 4;
1041 
1042  // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
1043  // from the next instruction.
1044  BrOffset -= 1;
1045 
1046  return isIntN(BranchOffsetBits, BrOffset);
1047 }
1048 
1050  const MachineInstr &MI) const {
1051  if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
1052  // This would be a difficult analysis to perform, but can always be legal so
1053  // there's no need to analyze it.
1054  return nullptr;
1055  }
1056 
1057  return MI.getOperand(0).getMBB();
1058 }
1059 
1061  MachineBasicBlock &DestBB,
1062  const DebugLoc &DL,
1063  int64_t BrOffset,
1064  RegScavenger *RS) const {
1065  assert(RS && "RegScavenger required for long branching");
1066  assert(MBB.empty() &&
1067  "new block should be inserted for expanding unconditional branch");
1068  assert(MBB.pred_size() == 1);
1069 
1070  MachineFunction *MF = MBB.getParent();
1071  MachineRegisterInfo &MRI = MF->getRegInfo();
1072 
1073  // FIXME: Virtual register workaround for RegScavenger not working with empty
1074  // blocks.
1075  unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1076 
1077  auto I = MBB.end();
1078 
1079  // We need to compute the offset relative to the instruction immediately after
1080  // s_getpc_b64. Insert pc arithmetic code before last terminator.
1081  MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
1082 
1083  // TODO: Handle > 32-bit block address.
1084  if (BrOffset >= 0) {
1085  BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
1086  .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1087  .addReg(PCReg, 0, AMDGPU::sub0)
1089  BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
1090  .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1091  .addReg(PCReg, 0, AMDGPU::sub1)
1092  .addImm(0);
1093  } else {
1094  // Backwards branch.
1095  BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
1096  .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1097  .addReg(PCReg, 0, AMDGPU::sub0)
1099  BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
1100  .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1101  .addReg(PCReg, 0, AMDGPU::sub1)
1102  .addImm(0);
1103  }
1104 
1105  // Insert the indirect branch after the other terminator.
1106  BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
1107  .addReg(PCReg);
1108 
1109  // FIXME: If spilling is necessary, this will fail because this scavenger has
1110  // no emergency stack slots. It is non-trivial to spill in this situation,
1111  // because the restore code needs to be specially placed after the
1112  // jump. BranchRelaxation then needs to be made aware of the newly inserted
1113  // block.
1114  //
1115  // If a spill is needed for the pc register pair, we need to insert a spill
1116  // restore block right before the destination block, and insert a short branch
1117  // into the old destination block's fallthrough predecessor.
1118  // e.g.:
1119  //
1120  // s_cbranch_scc0 skip_long_branch:
1121  //
1122  // long_branch_bb:
1123  // spill s[8:9]
1124  // s_getpc_b64 s[8:9]
1125  // s_add_u32 s8, s8, restore_bb
1126  // s_addc_u32 s9, s9, 0
1127  // s_setpc_b64 s[8:9]
1128  //
1129  // skip_long_branch:
1130  // foo;
1131  //
1132  // .....
1133  //
1134  // dest_bb_fallthrough_predecessor:
1135  // bar;
1136  // s_branch dest_bb
1137  //
1138  // restore_bb:
1139  // restore s[8:9]
1140  // fallthrough dest_bb
1141  ///
1142  // dest_bb:
1143  // buzz;
1144 
1145  RS->enterBasicBlockEnd(MBB);
1146  unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
1147  MachineBasicBlock::iterator(GetPC), 0);
1148  MRI.replaceRegWith(PCReg, Scav);
1149  MRI.clearVirtRegs();
1150  RS->setRegUsed(Scav);
1151 
1152  return 4 + 8 + 4 + 4;
1153 }
1154 
1155 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
1156  switch (Cond) {
1157  case SIInstrInfo::SCC_TRUE:
1158  return AMDGPU::S_CBRANCH_SCC1;
1159  case SIInstrInfo::SCC_FALSE:
1160  return AMDGPU::S_CBRANCH_SCC0;
1161  case SIInstrInfo::VCCNZ:
1162  return AMDGPU::S_CBRANCH_VCCNZ;
1163  case SIInstrInfo::VCCZ:
1164  return AMDGPU::S_CBRANCH_VCCZ;
1165  case SIInstrInfo::EXECNZ:
1166  return AMDGPU::S_CBRANCH_EXECNZ;
1167  case SIInstrInfo::EXECZ:
1168  return AMDGPU::S_CBRANCH_EXECZ;
1169  default:
1170  llvm_unreachable("invalid branch predicate");
1171  }
1172 }
1173 
1174 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
1175  switch (Opcode) {
1176  case AMDGPU::S_CBRANCH_SCC0:
1177  return SCC_FALSE;
1178  case AMDGPU::S_CBRANCH_SCC1:
1179  return SCC_TRUE;
1180  case AMDGPU::S_CBRANCH_VCCNZ:
1181  return VCCNZ;
1182  case AMDGPU::S_CBRANCH_VCCZ:
1183  return VCCZ;
1184  case AMDGPU::S_CBRANCH_EXECNZ:
1185  return EXECNZ;
1186  case AMDGPU::S_CBRANCH_EXECZ:
1187  return EXECZ;
1188  default:
1189  return INVALID_BR;
1190  }
1191 }
1192 
1195  MachineBasicBlock *&TBB,
1196  MachineBasicBlock *&FBB,
1198  bool AllowModify) const {
1199  if (I->getOpcode() == AMDGPU::S_BRANCH) {
1200  // Unconditional Branch
1201  TBB = I->getOperand(0).getMBB();
1202  return false;
1203  }
1204 
1205  BranchPredicate Pred = getBranchPredicate(I->getOpcode());
1206  if (Pred == INVALID_BR)
1207  return true;
1208 
1209  MachineBasicBlock *CondBB = I->getOperand(0).getMBB();
1211  Cond.push_back(I->getOperand(1)); // Save the branch register.
1212 
1213  ++I;
1214 
1215  if (I == MBB.end()) {
1216  // Conditional branch followed by fall-through.
1217  TBB = CondBB;
1218  return false;
1219  }
1220 
1221  if (I->getOpcode() == AMDGPU::S_BRANCH) {
1222  TBB = CondBB;
1223  FBB = I->getOperand(0).getMBB();
1224  return false;
1225  }
1226 
1227  return true;
1228 }
1229 
1231  MachineBasicBlock *&FBB,
1233  bool AllowModify) const {
1235  if (I == MBB.end())
1236  return false;
1237 
1238  if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
1239  return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
1240 
1241  ++I;
1242 
1243  // TODO: Should be able to treat as fallthrough?
1244  if (I == MBB.end())
1245  return true;
1246 
1247  if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
1248  return true;
1249 
1250  MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
1251 
1252  // Specifically handle the case where the conditional branch is to the same
1253  // destination as the mask branch. e.g.
1254  //
1255  // si_mask_branch BB8
1256  // s_cbranch_execz BB8
1257  // s_cbranch BB9
1258  //
1259  // This is required to understand divergent loops which may need the branches
1260  // to be relaxed.
1261  if (TBB != MaskBrDest || Cond.empty())
1262  return true;
1263 
1264  auto Pred = Cond[0].getImm();
1265  return (Pred != EXECZ && Pred != EXECNZ);
1266 }
1267 
1269  int *BytesRemoved) const {
1271 
1272  unsigned Count = 0;
1273  unsigned RemovedSize = 0;
1274  while (I != MBB.end()) {
1275  MachineBasicBlock::iterator Next = std::next(I);
1276  if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
1277  I = Next;
1278  continue;
1279  }
1280 
1281  RemovedSize += getInstSizeInBytes(*I);
1282  I->eraseFromParent();
1283  ++Count;
1284  I = Next;
1285  }
1286 
1287  if (BytesRemoved)
1288  *BytesRemoved = RemovedSize;
1289 
1290  return Count;
1291 }
1292 
1294  MachineBasicBlock *TBB,
1295  MachineBasicBlock *FBB,
1297  const DebugLoc &DL,
1298  int *BytesAdded) const {
1299 
1300  if (!FBB && Cond.empty()) {
1301  BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1302  .addMBB(TBB);
1303  if (BytesAdded)
1304  *BytesAdded = 4;
1305  return 1;
1306  }
1307 
1308  assert(TBB && Cond[0].isImm());
1309 
1310  unsigned Opcode
1311  = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
1312 
1313  if (!FBB) {
1314  Cond[1].isUndef();
1315  MachineInstr *CondBr =
1316  BuildMI(&MBB, DL, get(Opcode))
1317  .addMBB(TBB);
1318 
1319  // Copy the flags onto the implicit condition register operand.
1320  MachineOperand &CondReg = CondBr->getOperand(1);
1321  CondReg.setIsUndef(Cond[1].isUndef());
1322  CondReg.setIsKill(Cond[1].isKill());
1323 
1324  if (BytesAdded)
1325  *BytesAdded = 4;
1326  return 1;
1327  }
1328 
1329  assert(TBB && FBB);
1330 
1331  MachineInstr *CondBr =
1332  BuildMI(&MBB, DL, get(Opcode))
1333  .addMBB(TBB);
1334  BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1335  .addMBB(FBB);
1336 
1337  MachineOperand &CondReg = CondBr->getOperand(1);
1338  CondReg.setIsUndef(Cond[1].isUndef());
1339  CondReg.setIsKill(Cond[1].isKill());
1340 
1341  if (BytesAdded)
1342  *BytesAdded = 8;
1343 
1344  return 2;
1345 }
1346 
1348  SmallVectorImpl<MachineOperand> &Cond) const {
1349  assert(Cond.size() == 2);
1350  Cond[0].setImm(-Cond[0].getImm());
1351  return false;
1352 }
1353 
1355  unsigned Opc = MI.getOpcode();
1356  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1357  AMDGPU::OpName::src0_modifiers);
1358  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1359  AMDGPU::OpName::src1_modifiers);
1360  int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1361  AMDGPU::OpName::src2_modifiers);
1362 
1363  MI.RemoveOperand(Src2ModIdx);
1364  MI.RemoveOperand(Src1ModIdx);
1365  MI.RemoveOperand(Src0ModIdx);
1366 }
1367 
1369  unsigned Reg, MachineRegisterInfo *MRI) const {
1370  if (!MRI->hasOneNonDBGUse(Reg))
1371  return false;
1372 
1373  unsigned Opc = UseMI.getOpcode();
1374  if (Opc == AMDGPU::COPY) {
1375  bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
1376  switch (DefMI.getOpcode()) {
1377  default:
1378  return false;
1379  case AMDGPU::S_MOV_B64:
1380  // TODO: We could fold 64-bit immediates, but this get compilicated
1381  // when there are sub-registers.
1382  return false;
1383 
1384  case AMDGPU::V_MOV_B32_e32:
1385  case AMDGPU::S_MOV_B32:
1386  break;
1387  }
1388  unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
1389  const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
1390  assert(ImmOp);
1391  // FIXME: We could handle FrameIndex values here.
1392  if (!ImmOp->isImm()) {
1393  return false;
1394  }
1395  UseMI.setDesc(get(NewOpc));
1396  UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
1397  UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
1398  return true;
1399  }
1400 
1401  if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
1402  Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
1403  bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
1404 
1405  // Don't fold if we are using source modifiers. The new VOP2 instructions
1406  // don't have them.
1407  if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) ||
1408  hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) ||
1409  hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) {
1410  return false;
1411  }
1412 
1413  const MachineOperand &ImmOp = DefMI.getOperand(1);
1414 
1415  // If this is a free constant, there's no reason to do this.
1416  // TODO: We could fold this here instead of letting SIFoldOperands do it
1417  // later.
1418  MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
1419 
1420  // Any src operand can be used for the legality check.
1421  if (isInlineConstant(UseMI, *Src0, ImmOp))
1422  return false;
1423 
1424  MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
1425  MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
1426 
1427  // Multiplied part is the constant: Use v_madmk_{f16, f32}.
1428  // We should only expect these to be on src0 due to canonicalizations.
1429  if (Src0->isReg() && Src0->getReg() == Reg) {
1430  if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
1431  return false;
1432 
1433  if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
1434  return false;
1435 
1436  // We need to swap operands 0 and 1 since madmk constant is at operand 1.
1437 
1438  const int64_t Imm = DefMI.getOperand(1).getImm();
1439 
1440  // FIXME: This would be a lot easier if we could return a new instruction
1441  // instead of having to modify in place.
1442 
1443  // Remove these first since they are at the end.
1444  UseMI.RemoveOperand(
1445  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
1446  UseMI.RemoveOperand(
1447  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
1448 
1449  unsigned Src1Reg = Src1->getReg();
1450  unsigned Src1SubReg = Src1->getSubReg();
1451  Src0->setReg(Src1Reg);
1452  Src0->setSubReg(Src1SubReg);
1453  Src0->setIsKill(Src1->isKill());
1454 
1455  if (Opc == AMDGPU::V_MAC_F32_e64 ||
1456  Opc == AMDGPU::V_MAC_F16_e64)
1457  UseMI.untieRegOperand(
1458  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
1459 
1460  Src1->ChangeToImmediate(Imm);
1461 
1462  removeModOperands(UseMI);
1463  UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
1464 
1465  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
1466  if (DeleteDef)
1467  DefMI.eraseFromParent();
1468 
1469  return true;
1470  }
1471 
1472  // Added part is the constant: Use v_madak_{f16, f32}.
1473  if (Src2->isReg() && Src2->getReg() == Reg) {
1474  // Not allowed to use constant bus for another operand.
1475  // We can however allow an inline immediate as src0.
1476  if (!Src0->isImm() &&
1477  (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
1478  return false;
1479 
1480  if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
1481  return false;
1482 
1483  const int64_t Imm = DefMI.getOperand(1).getImm();
1484 
1485  // FIXME: This would be a lot easier if we could return a new instruction
1486  // instead of having to modify in place.
1487 
1488  // Remove these first since they are at the end.
1489  UseMI.RemoveOperand(
1490  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
1491  UseMI.RemoveOperand(
1492  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
1493 
1494  if (Opc == AMDGPU::V_MAC_F32_e64 ||
1495  Opc == AMDGPU::V_MAC_F16_e64)
1496  UseMI.untieRegOperand(
1497  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
1498 
1499  // ChangingToImmediate adds Src2 back to the instruction.
1500  Src2->ChangeToImmediate(Imm);
1501 
1502  // These come before src2.
1503  removeModOperands(UseMI);
1504  UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
1505 
1506  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
1507  if (DeleteDef)
1508  DefMI.eraseFromParent();
1509 
1510  return true;
1511  }
1512  }
1513 
1514  return false;
1515 }
1516 
1517 static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
1518  int WidthB, int OffsetB) {
1519  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1520  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1521  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1522  return LowOffset + LowWidth <= HighOffset;
1523 }
1524 
1525 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
1526  MachineInstr &MIb) const {
1527  unsigned BaseReg0, BaseReg1;
1528  int64_t Offset0, Offset1;
1529 
1530  if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
1531  getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
1532 
1533  if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
1534  // FIXME: Handle ds_read2 / ds_write2.
1535  return false;
1536  }
1537  unsigned Width0 = (*MIa.memoperands_begin())->getSize();
1538  unsigned Width1 = (*MIb.memoperands_begin())->getSize();
1539  if (BaseReg0 == BaseReg1 &&
1540  offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
1541  return true;
1542  }
1543  }
1544 
1545  return false;
1546 }
1547 
1549  MachineInstr &MIb,
1550  AliasAnalysis *AA) const {
1551  assert((MIa.mayLoad() || MIa.mayStore()) &&
1552  "MIa must load from or modify a memory location");
1553  assert((MIb.mayLoad() || MIb.mayStore()) &&
1554  "MIb must load from or modify a memory location");
1555 
1557  return false;
1558 
1559  // XXX - Can we relax this between address spaces?
1560  if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
1561  return false;
1562 
1563  if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
1564  const MachineMemOperand *MMOa = *MIa.memoperands_begin();
1565  const MachineMemOperand *MMOb = *MIb.memoperands_begin();
1566  if (MMOa->getValue() && MMOb->getValue()) {
1567  MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
1568  MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
1569  if (!AA->alias(LocA, LocB))
1570  return true;
1571  }
1572  }
1573 
1574  // TODO: Should we check the address space from the MachineMemOperand? That
1575  // would allow us to distinguish objects we know don't alias based on the
1576  // underlying address space, even if it was lowered to a different one,
1577  // e.g. private accesses lowered to use MUBUF instructions on a scratch
1578  // buffer.
1579  if (isDS(MIa)) {
1580  if (isDS(MIb))
1581  return checkInstOffsetsDoNotOverlap(MIa, MIb);
1582 
1583  return !isFLAT(MIb);
1584  }
1585 
1586  if (isMUBUF(MIa) || isMTBUF(MIa)) {
1587  if (isMUBUF(MIb) || isMTBUF(MIb))
1588  return checkInstOffsetsDoNotOverlap(MIa, MIb);
1589 
1590  return !isFLAT(MIb) && !isSMRD(MIb);
1591  }
1592 
1593  if (isSMRD(MIa)) {
1594  if (isSMRD(MIb))
1595  return checkInstOffsetsDoNotOverlap(MIa, MIb);
1596 
1597  return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
1598  }
1599 
1600  if (isFLAT(MIa)) {
1601  if (isFLAT(MIb))
1602  return checkInstOffsetsDoNotOverlap(MIa, MIb);
1603 
1604  return false;
1605  }
1606 
1607  return false;
1608 }
1609 
1611  MachineInstr &MI,
1612  LiveVariables *LV) const {
1613  bool IsF16 = false;
1614 
1615  switch (MI.getOpcode()) {
1616  default:
1617  return nullptr;
1618  case AMDGPU::V_MAC_F16_e64:
1619  IsF16 = true;
1620  case AMDGPU::V_MAC_F32_e64:
1621  break;
1622  case AMDGPU::V_MAC_F16_e32:
1623  IsF16 = true;
1624  case AMDGPU::V_MAC_F32_e32: {
1625  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1626  AMDGPU::OpName::src0);
1627  const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
1628  if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
1629  return nullptr;
1630  break;
1631  }
1632  }
1633 
1634  const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
1635  const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
1636  const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
1637  const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
1638 
1639  return BuildMI(*MBB, MI, MI.getDebugLoc(),
1640  get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
1641  .addOperand(*Dst)
1642  .addImm(0) // Src0 mods
1643  .addOperand(*Src0)
1644  .addImm(0) // Src1 mods
1645  .addOperand(*Src1)
1646  .addImm(0) // Src mods
1647  .addOperand(*Src2)
1648  .addImm(0) // clamp
1649  .addImm(0); // omod
1650 }
1651 
1652 // It's not generally safe to move VALU instructions across these since it will
1653 // start using the register as a base index rather than directly.
1654 // XXX - Why isn't hasSideEffects sufficient for these?
1656  switch (MI.getOpcode()) {
1657  case AMDGPU::S_SET_GPR_IDX_ON:
1658  case AMDGPU::S_SET_GPR_IDX_MODE:
1659  case AMDGPU::S_SET_GPR_IDX_OFF:
1660  return true;
1661  default:
1662  return false;
1663  }
1664 }
1665 
1667  const MachineBasicBlock *MBB,
1668  const MachineFunction &MF) const {
1669  // XXX - Do we want the SP check in the base implementation?
1670 
1671  // Target-independent instructions do not have an implicit-use of EXEC, even
1672  // when they operate on VGPRs. Treating EXEC modifications as scheduling
1673  // boundaries prevents incorrect movements of such instructions.
1674  return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) ||
1675  MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
1676  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
1677  MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
1679 }
1680 
1681 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
1682  switch (Imm.getBitWidth()) {
1683  case 32:
1685  ST.hasInv2PiInlineImm());
1686  case 64:
1688  ST.hasInv2PiInlineImm());
1689  case 16:
1691  ST.hasInv2PiInlineImm());
1692  default:
1693  llvm_unreachable("invalid bitwidth");
1694  }
1695 }
1696 
1698  uint8_t OperandType) const {
1699  if (!MO.isImm() || OperandType < MCOI::OPERAND_FIRST_TARGET)
1700  return false;
1701 
1702  // MachineOperand provides no way to tell the true operand size, since it only
1703  // records a 64-bit value. We need to know the size to determine if a 32-bit
1704  // floating point immediate bit pattern is legal for an integer immediate. It
1705  // would be for any 32-bit integer operand, but would not be for a 64-bit one.
1706 
1707  int64_t Imm = MO.getImm();
1708  switch (operandBitWidth(OperandType)) {
1709  case 32: {
1710  int32_t Trunc = static_cast<int32_t>(Imm);
1711  return Trunc == Imm &&
1713  }
1714  case 64: {
1716  ST.hasInv2PiInlineImm());
1717  }
1718  case 16: {
1719  if (isInt<16>(Imm) || isUInt<16>(Imm)) {
1720  int16_t Trunc = static_cast<int16_t>(Imm);
1722  }
1723 
1724  return false;
1725  }
1726  default:
1727  llvm_unreachable("invalid bitwidth");
1728  }
1729 }
1730 
1732  const MCOperandInfo &OpInfo) const {
1733  switch (MO.getType()) {
1735  return false;
1737  return !isInlineConstant(MO, OpInfo);
1743  return true;
1744  default:
1745  llvm_unreachable("unexpected operand type");
1746  }
1747 }
1748 
1749 static bool compareMachineOp(const MachineOperand &Op0,
1750  const MachineOperand &Op1) {
1751  if (Op0.getType() != Op1.getType())
1752  return false;
1753 
1754  switch (Op0.getType()) {
1756  return Op0.getReg() == Op1.getReg();
1758  return Op0.getImm() == Op1.getImm();
1759  default:
1760  llvm_unreachable("Didn't expect to be comparing these operand types");
1761  }
1762 }
1763 
1765  const MachineOperand &MO) const {
1766  const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
1767 
1768  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
1769 
1770  if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
1771  return true;
1772 
1773  if (OpInfo.RegClass < 0)
1774  return false;
1775 
1776  if (MO.isImm() && isInlineConstant(MO, OpInfo))
1777  return RI.opCanUseInlineConstant(OpInfo.OperandType);
1778 
1779  return RI.opCanUseLiteralConstant(OpInfo.OperandType);
1780 }
1781 
1782 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
1783  int Op32 = AMDGPU::getVOPe32(Opcode);
1784  if (Op32 == -1)
1785  return false;
1786 
1787  return pseudoToMCOpcode(Op32) != -1;
1788 }
1789 
1790 bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
1791  // The src0_modifier operand is present on all instructions
1792  // that have modifiers.
1793 
1794  return AMDGPU::getNamedOperandIdx(Opcode,
1795  AMDGPU::OpName::src0_modifiers) != -1;
1796 }
1797 
1799  unsigned OpName) const {
1800  const MachineOperand *Mods = getNamedOperand(MI, OpName);
1801  return Mods && Mods->getImm();
1802 }
1803 
1805  const MachineOperand &MO,
1806  const MCOperandInfo &OpInfo) const {
1807  // Literal constants use the constant bus.
1808  //if (isLiteralConstantLike(MO, OpInfo))
1809  // return true;
1810  if (MO.isImm())
1811  return !isInlineConstant(MO, OpInfo);
1812 
1813  if (!MO.isReg())
1814  return true; // Misc other operands like FrameIndex
1815 
1816  if (!MO.isUse())
1817  return false;
1818 
1820  return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
1821 
1822  // FLAT_SCR is just an SGPR pair.
1823  if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
1824  return true;
1825 
1826  // EXEC register uses the constant bus.
1827  if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
1828  return true;
1829 
1830  // SGPRs use the constant bus
1831  return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
1832  (!MO.isImplicit() &&
1833  (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
1834  AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
1835 }
1836 
1837 static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
1838  for (const MachineOperand &MO : MI.implicit_operands()) {
1839  // We only care about reads.
1840  if (MO.isDef())
1841  continue;
1842 
1843  switch (MO.getReg()) {
1844  case AMDGPU::VCC:
1845  case AMDGPU::M0:
1846  case AMDGPU::FLAT_SCR:
1847  return MO.getReg();
1848 
1849  default:
1850  break;
1851  }
1852  }
1853 
1854  return AMDGPU::NoRegister;
1855 }
1856 
1857 static bool shouldReadExec(const MachineInstr &MI) {
1858  if (SIInstrInfo::isVALU(MI)) {
1859  switch (MI.getOpcode()) {
1860  case AMDGPU::V_READLANE_B32:
1861  case AMDGPU::V_READLANE_B32_si:
1862  case AMDGPU::V_READLANE_B32_vi:
1863  case AMDGPU::V_WRITELANE_B32:
1864  case AMDGPU::V_WRITELANE_B32_si:
1865  case AMDGPU::V_WRITELANE_B32_vi:
1866  return false;
1867  }
1868 
1869  return true;
1870  }
1871 
1872  if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
1873  SIInstrInfo::isSALU(MI) ||
1874  SIInstrInfo::isSMRD(MI))
1875  return false;
1876 
1877  return true;
1878 }
1879 
1880 static bool isSubRegOf(const SIRegisterInfo &TRI,
1881  const MachineOperand &SuperVec,
1882  const MachineOperand &SubReg) {
1884  return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
1885 
1886  return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
1887  SubReg.getReg() == SuperVec.getReg();
1888 }
1889 
1891  StringRef &ErrInfo) const {
1892  uint16_t Opcode = MI.getOpcode();
1894  int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
1895  int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
1896  int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
1897 
1898  // Make sure the number of operands is correct.
1899  const MCInstrDesc &Desc = get(Opcode);
1900  if (!Desc.isVariadic() &&
1901  Desc.getNumOperands() != MI.getNumExplicitOperands()) {
1902  ErrInfo = "Instruction has wrong number of operands.";
1903  return false;
1904  }
1905 
1906  if (MI.isInlineAsm()) {
1907  // Verify register classes for inlineasm constraints.
1908  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
1909  I != E; ++I) {
1910  const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
1911  if (!RC)
1912  continue;
1913 
1914  const MachineOperand &Op = MI.getOperand(I);
1915  if (!Op.isReg())
1916  continue;
1917 
1918  unsigned Reg = Op.getReg();
1919  if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
1920  ErrInfo = "inlineasm operand has incorrect register class.";
1921  return false;
1922  }
1923  }
1924 
1925  return true;
1926  }
1927 
1928  // Make sure the register classes are correct.
1929  for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
1930  if (MI.getOperand(i).isFPImm()) {
1931  ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
1932  "all fp values to integers.";
1933  return false;
1934  }
1935 
1936  int RegClass = Desc.OpInfo[i].RegClass;
1937 
1938  switch (Desc.OpInfo[i].OperandType) {
1940  if (MI.getOperand(i).isImm()) {
1941  ErrInfo = "Illegal immediate value for operand.";
1942  return false;
1943  }
1944  break;
1947  break;
1954  const MachineOperand &MO = MI.getOperand(i);
1955  if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
1956  ErrInfo = "Illegal immediate value for operand.";
1957  return false;
1958  }
1959  break;
1960  }
1963  // Check if this operand is an immediate.
1964  // FrameIndex operands will be replaced by immediates, so they are
1965  // allowed.
1966  if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
1967  ErrInfo = "Expected immediate, but got non-immediate";
1968  return false;
1969  }
1971  default:
1972  continue;
1973  }
1974 
1975  if (!MI.getOperand(i).isReg())
1976  continue;
1977 
1978  if (RegClass != -1) {
1979  unsigned Reg = MI.getOperand(i).getReg();
1980  if (Reg == AMDGPU::NoRegister ||
1982  continue;
1983 
1984  const TargetRegisterClass *RC = RI.getRegClass(RegClass);
1985  if (!RC->contains(Reg)) {
1986  ErrInfo = "Operand has incorrect register class.";
1987  return false;
1988  }
1989  }
1990  }
1991 
1992  // Verify VOP*
1993  if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI)) {
1994  // Only look at the true operands. Only a real operand can use the constant
1995  // bus, and we don't want to check pseudo-operands like the source modifier
1996  // flags.
1997  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
1998 
1999  unsigned ConstantBusCount = 0;
2000 
2001  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
2002  ++ConstantBusCount;
2003 
2004  unsigned SGPRUsed = findImplicitSGPRRead(MI);
2005  if (SGPRUsed != AMDGPU::NoRegister)
2006  ++ConstantBusCount;
2007 
2008  for (int OpIdx : OpIndices) {
2009  if (OpIdx == -1)
2010  break;
2011  const MachineOperand &MO = MI.getOperand(OpIdx);
2012  if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
2013  if (MO.isReg()) {
2014  if (MO.getReg() != SGPRUsed)
2015  ++ConstantBusCount;
2016  SGPRUsed = MO.getReg();
2017  } else {
2018  ++ConstantBusCount;
2019  }
2020  }
2021  }
2022  if (ConstantBusCount > 1) {
2023  ErrInfo = "VOP* instruction uses the constant bus more than once";
2024  return false;
2025  }
2026  }
2027 
2028  // Verify misc. restrictions on specific instructions.
2029  if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
2030  Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
2031  const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2032  const MachineOperand &Src1 = MI.getOperand(Src1Idx);
2033  const MachineOperand &Src2 = MI.getOperand(Src2Idx);
2034  if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
2035  if (!compareMachineOp(Src0, Src1) &&
2036  !compareMachineOp(Src0, Src2)) {
2037  ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
2038  return false;
2039  }
2040  }
2041  }
2042 
2043  if (isSOPK(MI)) {
2044  int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
2045  if (sopkIsZext(MI)) {
2046  if (!isUInt<16>(Imm)) {
2047  ErrInfo = "invalid immediate for SOPK instruction";
2048  return false;
2049  }
2050  } else {
2051  if (!isInt<16>(Imm)) {
2052  ErrInfo = "invalid immediate for SOPK instruction";
2053  return false;
2054  }
2055  }
2056  }
2057 
2058  if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
2059  Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
2060  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2061  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
2062  const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2063  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
2064 
2065  const unsigned StaticNumOps = Desc.getNumOperands() +
2066  Desc.getNumImplicitUses();
2067  const unsigned NumImplicitOps = IsDst ? 2 : 1;
2068 
2069  // Allow additional implicit operands. This allows a fixup done by the post
2070  // RA scheduler where the main implicit operand is killed and implicit-defs
2071  // are added for sub-registers that remain live after this instruction.
2072  if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
2073  ErrInfo = "missing implicit register operands";
2074  return false;
2075  }
2076 
2077  const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2078  if (IsDst) {
2079  if (!Dst->isUse()) {
2080  ErrInfo = "v_movreld_b32 vdst should be a use operand";
2081  return false;
2082  }
2083 
2084  unsigned UseOpIdx;
2085  if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
2086  UseOpIdx != StaticNumOps + 1) {
2087  ErrInfo = "movrel implicit operands should be tied";
2088  return false;
2089  }
2090  }
2091 
2092  const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2093  const MachineOperand &ImpUse
2094  = MI.getOperand(StaticNumOps + NumImplicitOps - 1);
2095  if (!ImpUse.isReg() || !ImpUse.isUse() ||
2096  !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
2097  ErrInfo = "src0 should be subreg of implicit vector use";
2098  return false;
2099  }
2100  }
2101 
2102  // Make sure we aren't losing exec uses in the td files. This mostly requires
2103  // being careful when using let Uses to try to add other use registers.
2104  if (shouldReadExec(MI)) {
2105  if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
2106  ErrInfo = "VALU instruction does not implicitly read exec mask";
2107  return false;
2108  }
2109  }
2110 
2111  if (isSMRD(MI)) {
2112  if (MI.mayStore()) {
2113  // The register offset form of scalar stores may only use m0 as the
2114  // soffset register.
2115  const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
2116  if (Soff && Soff->getReg() != AMDGPU::M0) {
2117  ErrInfo = "scalar stores must use m0 as offset register";
2118  return false;
2119  }
2120  }
2121  }
2122 
2123  return true;
2124 }
2125 
2127  switch (MI.getOpcode()) {
2128  default: return AMDGPU::INSTRUCTION_LIST_END;
2129  case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
2130  case AMDGPU::COPY: return AMDGPU::COPY;
2131  case AMDGPU::PHI: return AMDGPU::PHI;
2132  case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
2133  case AMDGPU::S_MOV_B32:
2134  return MI.getOperand(1).isReg() ?
2135  AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
2136  case AMDGPU::S_ADD_I32:
2137  case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
2138  case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
2139  case AMDGPU::S_SUB_I32:
2140  case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
2141  case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
2142  case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
2143  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
2144  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
2145  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
2146  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
2147  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
2148  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
2149  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
2150  case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
2151  case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
2152  case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
2153  case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
2154  case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
2155  case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
2156  case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
2157  case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
2158  case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
2159  case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
2160  case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
2161  case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
2162  case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
2163  case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
2164  case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
2165  case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
2166  case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
2167  case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
2168  case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
2169  case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
2170  case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
2171  case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
2172  case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
2173  case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
2174  case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
2175  case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
2176  case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
2177  case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
2178  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
2179  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
2180  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
2181  case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
2182  case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
2183  case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
2184  }
2185 }
2186 
2188  return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
2189 }
2190 
2192  unsigned OpNo) const {
2194  const MCInstrDesc &Desc = get(MI.getOpcode());
2195  if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
2196  Desc.OpInfo[OpNo].RegClass == -1) {
2197  unsigned Reg = MI.getOperand(OpNo).getReg();
2198 
2200  return MRI.getRegClass(Reg);
2201  return RI.getPhysRegClass(Reg);
2202  }
2203 
2204  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
2205  return RI.getRegClass(RCID);
2206 }
2207 
2208 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
2209  switch (MI.getOpcode()) {
2210  case AMDGPU::COPY:
2211  case AMDGPU::REG_SEQUENCE:
2212  case AMDGPU::PHI:
2213  case AMDGPU::INSERT_SUBREG:
2214  return RI.hasVGPRs(getOpRegClass(MI, 0));
2215  default:
2216  return RI.hasVGPRs(getOpRegClass(MI, OpNo));
2217  }
2218 }
2219 
2220 void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
2223  MachineOperand &MO = MI.getOperand(OpIdx);
2225  unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
2226  const TargetRegisterClass *RC = RI.getRegClass(RCID);
2227  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
2228  if (MO.isReg())
2229  Opcode = AMDGPU::COPY;
2230  else if (RI.isSGPRClass(RC))
2231  Opcode = AMDGPU::S_MOV_B32;
2232 
2233  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
2234  if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
2235  VRC = &AMDGPU::VReg_64RegClass;
2236  else
2237  VRC = &AMDGPU::VGPR_32RegClass;
2238 
2239  unsigned Reg = MRI.createVirtualRegister(VRC);
2240  DebugLoc DL = MBB->findDebugLoc(I);
2241  BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).addOperand(MO);
2242  MO.ChangeToRegister(Reg, false);
2243 }
2244 
2245 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
2247  MachineOperand &SuperReg,
2248  const TargetRegisterClass *SuperRC,
2249  unsigned SubIdx,
2250  const TargetRegisterClass *SubRC)
2251  const {
2252  MachineBasicBlock *MBB = MI->getParent();
2253  DebugLoc DL = MI->getDebugLoc();
2254  unsigned SubReg = MRI.createVirtualRegister(SubRC);
2255 
2256  if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
2257  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
2258  .addReg(SuperReg.getReg(), 0, SubIdx);
2259  return SubReg;
2260  }
2261 
2262  // Just in case the super register is itself a sub-register, copy it to a new
2263  // value so we don't need to worry about merging its subreg index with the
2264  // SubIdx passed to this function. The register coalescer should be able to
2265  // eliminate this extra copy.
2266  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
2267 
2268  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
2269  .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
2270 
2271  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
2272  .addReg(NewSuperReg, 0, SubIdx);
2273 
2274  return SubReg;
2275 }
2276 
2277 MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
2279  MachineRegisterInfo &MRI,
2280  MachineOperand &Op,
2281  const TargetRegisterClass *SuperRC,
2282  unsigned SubIdx,
2283  const TargetRegisterClass *SubRC) const {
2284  if (Op.isImm()) {
2285  if (SubIdx == AMDGPU::sub0)
2286  return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
2287  if (SubIdx == AMDGPU::sub1)
2288  return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
2289 
2290  llvm_unreachable("Unhandled register index for immediate");
2291  }
2292 
2293  unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
2294  SubIdx, SubRC);
2295  return MachineOperand::CreateReg(SubReg, false);
2296 }
2297 
2298 // Change the order of operands from (0, 1, 2) to (0, 2, 1)
2299 void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
2300  assert(Inst.getNumExplicitOperands() == 3);
2301  MachineOperand Op1 = Inst.getOperand(1);
2302  Inst.RemoveOperand(1);
2303  Inst.addOperand(Op1);
2304 }
2305 
2307  const MCOperandInfo &OpInfo,
2308  const MachineOperand &MO) const {
2309  if (!MO.isReg())
2310  return false;
2311 
2312  unsigned Reg = MO.getReg();
2313  const TargetRegisterClass *RC =
2315  MRI.getRegClass(Reg) :
2316  RI.getPhysRegClass(Reg);
2317 
2318  const SIRegisterInfo *TRI =
2319  static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
2320  RC = TRI->getSubRegClass(RC, MO.getSubReg());
2321 
2322  // In order to be legal, the common sub-class must be equal to the
2323  // class of the current operand. For example:
2324  //
2325  // v_mov_b32 s0 ; Operand defined as vsrc_b32
2326  // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
2327  //
2328  // s_sendmsg 0, s0 ; Operand defined as m0reg
2329  // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
2330 
2331  return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
2332 }
2333 
2335  const MCOperandInfo &OpInfo,
2336  const MachineOperand &MO) const {
2337  if (MO.isReg())
2338  return isLegalRegOperand(MRI, OpInfo, MO);
2339 
2340  // Handle non-register types that are treated like immediates.
2341  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
2342  return true;
2343 }
2344 
2345 bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
2346  const MachineOperand *MO) const {
2347  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2348  const MCInstrDesc &InstDesc = MI.getDesc();
2349  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
2350  const TargetRegisterClass *DefinedRC =
2351  OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
2352  if (!MO)
2353  MO = &MI.getOperand(OpIdx);
2354 
2355  if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
2356 
2357  RegSubRegPair SGPRUsed;
2358  if (MO->isReg())
2359  SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
2360 
2361  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2362  if (i == OpIdx)
2363  continue;
2364  const MachineOperand &Op = MI.getOperand(i);
2365  if (Op.isReg()) {
2366  if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
2367  usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
2368  return false;
2369  }
2370  } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
2371  return false;
2372  }
2373  }
2374  }
2375 
2376  if (MO->isReg()) {
2377  assert(DefinedRC);
2378  return isLegalRegOperand(MRI, OpInfo, *MO);
2379  }
2380 
2381  // Handle non-register types that are treated like immediates.
2382  assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
2383 
2384  if (!DefinedRC) {
2385  // This operand expects an immediate.
2386  return true;
2387  }
2388 
2389  return isImmOperandLegal(MI, OpIdx, *MO);
2390 }
2391 
2393  MachineInstr &MI) const {
2394  unsigned Opc = MI.getOpcode();
2395  const MCInstrDesc &InstrDesc = get(Opc);
2396 
2397  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
2398  MachineOperand &Src1 = MI.getOperand(Src1Idx);
2399 
2400  // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
2401  // we need to only have one constant bus use.
2402  //
2403  // Note we do not need to worry about literal constants here. They are
2404  // disabled for the operand type for instructions because they will always
2405  // violate the one constant bus use rule.
2406  bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
2407  if (HasImplicitSGPR) {
2408  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
2409  MachineOperand &Src0 = MI.getOperand(Src0Idx);
2410 
2411  if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
2412  legalizeOpWithMove(MI, Src0Idx);
2413  }
2414 
2415  // VOP2 src0 instructions support all operand types, so we don't need to check
2416  // their legality. If src1 is already legal, we don't need to do anything.
2417  if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
2418  return;
2419 
2420  // We do not use commuteInstruction here because it is too aggressive and will
2421  // commute if it is possible. We only want to commute here if it improves
2422  // legality. This can be called a fairly large number of times so don't waste
2423  // compile time pointlessly swapping and checking legality again.
2424  if (HasImplicitSGPR || !MI.isCommutable()) {
2425  legalizeOpWithMove(MI, Src1Idx);
2426  return;
2427  }
2428 
2429  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
2430  MachineOperand &Src0 = MI.getOperand(Src0Idx);
2431 
2432  // If src0 can be used as src1, commuting will make the operands legal.
2433  // Otherwise we have to give up and insert a move.
2434  //
2435  // TODO: Other immediate-like operand kinds could be commuted if there was a
2436  // MachineOperand::ChangeTo* for them.
2437  if ((!Src1.isImm() && !Src1.isReg()) ||
2438  !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
2439  legalizeOpWithMove(MI, Src1Idx);
2440  return;
2441  }
2442 
2443  int CommutedOpc = commuteOpcode(MI);
2444  if (CommutedOpc == -1) {
2445  legalizeOpWithMove(MI, Src1Idx);
2446  return;
2447  }
2448 
2449  MI.setDesc(get(CommutedOpc));
2450 
2451  unsigned Src0Reg = Src0.getReg();
2452  unsigned Src0SubReg = Src0.getSubReg();
2453  bool Src0Kill = Src0.isKill();
2454 
2455  if (Src1.isImm())
2456  Src0.ChangeToImmediate(Src1.getImm());
2457  else if (Src1.isReg()) {
2458  Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
2459  Src0.setSubReg(Src1.getSubReg());
2460  } else
2461  llvm_unreachable("Should only have register or immediate operands");
2462 
2463  Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
2464  Src1.setSubReg(Src0SubReg);
2465 }
2466 
2467 // Legalize VOP3 operands. Because all operand types are supported for any
2468 // operand, and since literal constants are not allowed and should never be
2469 // seen, we only need to worry about inserting copies if we use multiple SGPR
2470 // operands.
2472  MachineInstr &MI) const {
2473  unsigned Opc = MI.getOpcode();
2474 
2475  int VOP3Idx[3] = {
2476  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
2477  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
2478  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
2479  };
2480 
2481  // Find the one SGPR operand we are allowed to use.
2482  unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
2483 
2484  for (unsigned i = 0; i < 3; ++i) {
2485  int Idx = VOP3Idx[i];
2486  if (Idx == -1)
2487  break;
2488  MachineOperand &MO = MI.getOperand(Idx);
2489 
2490  // We should never see a VOP3 instruction with an illegal immediate operand.
2491  if (!MO.isReg())
2492  continue;
2493 
2494  if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
2495  continue; // VGPRs are legal
2496 
2497  if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
2498  SGPRReg = MO.getReg();
2499  // We can use one SGPR in each VOP3 instruction.
2500  continue;
2501  }
2502 
2503  // If we make it this far, then the operand is not legal and we must
2504  // legalize it.
2505  legalizeOpWithMove(MI, Idx);
2506  }
2507 }
2508 
2510  MachineRegisterInfo &MRI) const {
2511  const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
2512  const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
2513  unsigned DstReg = MRI.createVirtualRegister(SRC);
2514  unsigned SubRegs = VRC->getSize() / 4;
2515 
2517  for (unsigned i = 0; i < SubRegs; ++i) {
2518  unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2519  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
2520  get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
2521  .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
2522  SRegs.push_back(SGPR);
2523  }
2524 
2525  MachineInstrBuilder MIB =
2526  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
2527  get(AMDGPU::REG_SEQUENCE), DstReg);
2528  for (unsigned i = 0; i < SubRegs; ++i) {
2529  MIB.addReg(SRegs[i]);
2530  MIB.addImm(RI.getSubRegFromChannel(i));
2531  }
2532  return DstReg;
2533 }
2534 
2536  MachineInstr &MI) const {
2537 
2538  // If the pointer is store in VGPRs, then we need to move them to
2539  // SGPRs using v_readfirstlane. This is safe because we only select
2540  // loads with uniform pointers to SMRD instruction so we know the
2541  // pointer value is uniform.
2542  MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
2543  if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
2544  unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
2545  SBase->setReg(SGPR);
2546  }
2547 }
2548 
2551  const TargetRegisterClass *DstRC,
2552  MachineOperand &Op,
2553  MachineRegisterInfo &MRI,
2554  const DebugLoc &DL) const {
2555 
2556  unsigned OpReg = Op.getReg();
2557  unsigned OpSubReg = Op.getSubReg();
2558 
2559  const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
2560  RI.getRegClassForReg(MRI, OpReg), OpSubReg);
2561 
2562  // Check if operand is already the correct register class.
2563  if (DstRC == OpRC)
2564  return;
2565 
2566  unsigned DstReg = MRI.createVirtualRegister(DstRC);
2567  MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg)
2568  .addOperand(Op);
2569 
2570  Op.setReg(DstReg);
2571  Op.setSubReg(0);
2572 
2573  MachineInstr *Def = MRI.getVRegDef(OpReg);
2574  if (!Def)
2575  return;
2576 
2577  // Try to eliminate the copy if it is copying an immediate value.
2578  if (Def->isMoveImmediate())
2579  FoldImmediate(*Copy, *Def, OpReg, &MRI);
2580 }
2581 
2583  MachineFunction &MF = *MI.getParent()->getParent();
2584  MachineRegisterInfo &MRI = MF.getRegInfo();
2585 
2586  // Legalize VOP2
2587  if (isVOP2(MI) || isVOPC(MI)) {
2588  legalizeOperandsVOP2(MRI, MI);
2589  return;
2590  }
2591 
2592  // Legalize VOP3
2593  if (isVOP3(MI)) {
2594  legalizeOperandsVOP3(MRI, MI);
2595  return;
2596  }
2597 
2598  // Legalize SMRD
2599  if (isSMRD(MI)) {
2600  legalizeOperandsSMRD(MRI, MI);
2601  return;
2602  }
2603 
2604  // Legalize REG_SEQUENCE and PHI
2605  // The register class of the operands much be the same type as the register
2606  // class of the output.
2607  if (MI.getOpcode() == AMDGPU::PHI) {
2608  const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
2609  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
2610  if (!MI.getOperand(i).isReg() ||
2612  continue;
2613  const TargetRegisterClass *OpRC =
2614  MRI.getRegClass(MI.getOperand(i).getReg());
2615  if (RI.hasVGPRs(OpRC)) {
2616  VRC = OpRC;
2617  } else {
2618  SRC = OpRC;
2619  }
2620  }
2621 
2622  // If any of the operands are VGPR registers, then they all most be
2623  // otherwise we will create illegal VGPR->SGPR copies when legalizing
2624  // them.
2625  if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
2626  if (!VRC) {
2627  assert(SRC);
2628  VRC = RI.getEquivalentVGPRClass(SRC);
2629  }
2630  RC = VRC;
2631  } else {
2632  RC = SRC;
2633  }
2634 
2635  // Update all the operands so they have the same type.
2636  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2637  MachineOperand &Op = MI.getOperand(I);
2639  continue;
2640 
2641  // MI is a PHI instruction.
2642  MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
2643  MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
2644 
2645  // Avoid creating no-op copies with the same src and dst reg class. These
2646  // confuse some of the machine passes.
2647  legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
2648  }
2649  }
2650 
2651  // REG_SEQUENCE doesn't really require operand legalization, but if one has a
2652  // VGPR dest type and SGPR sources, insert copies so all operands are
2653  // VGPRs. This seems to help operand folding / the register coalescer.
2654  if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
2655  MachineBasicBlock *MBB = MI.getParent();
2656  const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
2657  if (RI.hasVGPRs(DstRC)) {
2658  // Update all the operands so they are VGPR register classes. These may
2659  // not be the same register class because REG_SEQUENCE supports mixing
2660  // subregister index types e.g. sub0_sub1 + sub2 + sub3
2661  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2662  MachineOperand &Op = MI.getOperand(I);
2664  continue;
2665 
2666  const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
2667  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
2668  if (VRC == OpRC)
2669  continue;
2670 
2671  legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
2672  Op.setIsKill();
2673  }
2674  }
2675 
2676  return;
2677  }
2678 
2679  // Legalize INSERT_SUBREG
2680  // src0 must have the same register class as dst
2681  if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
2682  unsigned Dst = MI.getOperand(0).getReg();
2683  unsigned Src0 = MI.getOperand(1).getReg();
2684  const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
2685  const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
2686  if (DstRC != Src0RC) {
2687  MachineBasicBlock *MBB = MI.getParent();
2688  MachineOperand &Op = MI.getOperand(1);
2689  legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
2690  }
2691  return;
2692  }
2693 
2694  // Legalize MIMG and MUBUF/MTBUF for shaders.
2695  //
2696  // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
2697  // scratch memory access. In both cases, the legalization never involves
2698  // conversion to the addr64 form.
2699  if (isMIMG(MI) ||
2701  (isMUBUF(MI) || isMTBUF(MI)))) {
2702  MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
2703  if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
2704  unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
2705  SRsrc->setReg(SGPR);
2706  }
2707 
2708  MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
2709  if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
2710  unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
2711  SSamp->setReg(SGPR);
2712  }
2713  return;
2714  }
2715 
2716  // Legalize MUBUF* instructions by converting to addr64 form.
2717  // FIXME: If we start using the non-addr64 instructions for compute, we
2718  // may need to legalize them as above. This especially applies to the
2719  // buffer_load_format_* variants and variants with idxen (or bothen).
2720  int SRsrcIdx =
2721  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
2722  if (SRsrcIdx != -1) {
2723  // We have an MUBUF instruction
2724  MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx);
2725  unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass;
2726  if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
2727  RI.getRegClass(SRsrcRC))) {
2728  // The operands are legal.
2729  // FIXME: We may need to legalize operands besided srsrc.
2730  return;
2731  }
2732 
2733  MachineBasicBlock &MBB = *MI.getParent();
2734 
2735  // Extract the ptr from the resource descriptor.
2736  unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
2737  &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
2738 
2739  // Create an empty resource descriptor
2740  unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2741  unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2742  unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
2743  unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
2744  uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
2745 
2746  // Zero64 = 0
2747  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64)
2748  .addImm(0);
2749 
2750  // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
2751  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
2752  .addImm(RsrcDataFormat & 0xFFFFFFFF);
2753 
2754  // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
2755  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
2756  .addImm(RsrcDataFormat >> 32);
2757 
2758  // NewSRsrc = {Zero64, SRsrcFormat}
2759  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
2760  .addReg(Zero64)
2761  .addImm(AMDGPU::sub0_sub1)
2762  .addReg(SRsrcFormatLo)
2763  .addImm(AMDGPU::sub2)
2764  .addReg(SRsrcFormatHi)
2765  .addImm(AMDGPU::sub3);
2766 
2767  MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
2768  unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2769  if (VAddr) {
2770  // This is already an ADDR64 instruction so we need to add the pointer
2771  // extracted from the resource descriptor to the current value of VAddr.
2772  unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2773  unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2774 
2775  // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
2776  DebugLoc DL = MI.getDebugLoc();
2777  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
2778  .addReg(SRsrcPtr, 0, AMDGPU::sub0)
2779  .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
2780 
2781  // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
2782  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
2783  .addReg(SRsrcPtr, 0, AMDGPU::sub1)
2784  .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
2785 
2786  // NewVaddr = {NewVaddrHi, NewVaddrLo}
2787  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
2788  .addReg(NewVAddrLo)
2789  .addImm(AMDGPU::sub0)
2790  .addReg(NewVAddrHi)
2791  .addImm(AMDGPU::sub1);
2792  } else {
2793  // This instructions is the _OFFSET variant, so we need to convert it to
2794  // ADDR64.
2795  assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
2797  "FIXME: Need to emit flat atomics here");
2798 
2799  MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
2800  MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
2801  MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
2802  unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
2803 
2804  // Atomics rith return have have an additional tied operand and are
2805  // missing some of the special bits.
2806  MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
2807  MachineInstr *Addr64;
2808 
2809  if (!VDataIn) {
2810  // Regular buffer load / store.
2811  MachineInstrBuilder MIB =
2812  BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
2813  .addOperand(*VData)
2814  .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
2815  // This will be replaced later
2816  // with the new value of vaddr.
2817  .addOperand(*SRsrc)
2818  .addOperand(*SOffset)
2819  .addOperand(*Offset);
2820 
2821  // Atomics do not have this operand.
2822  if (const MachineOperand *GLC =
2823  getNamedOperand(MI, AMDGPU::OpName::glc)) {
2824  MIB.addImm(GLC->getImm());
2825  }
2826 
2827  MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
2828 
2829  if (const MachineOperand *TFE =
2830  getNamedOperand(MI, AMDGPU::OpName::tfe)) {
2831  MIB.addImm(TFE->getImm());
2832  }
2833 
2835  Addr64 = MIB;
2836  } else {
2837  // Atomics with return.
2838  Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
2839  .addOperand(*VData)
2840  .addOperand(*VDataIn)
2841  .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
2842  // This will be replaced later
2843  // with the new value of vaddr.
2844  .addOperand(*SRsrc)
2845  .addOperand(*SOffset)
2846  .addOperand(*Offset)
2847  .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
2849  }
2850 
2851  MI.removeFromParent();
2852 
2853  // NewVaddr = {NewVaddrHi, NewVaddrLo}
2854  BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
2855  NewVAddr)
2856  .addReg(SRsrcPtr, 0, AMDGPU::sub0)
2857  .addImm(AMDGPU::sub0)
2858  .addReg(SRsrcPtr, 0, AMDGPU::sub1)
2859  .addImm(AMDGPU::sub1);
2860 
2861  VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
2862  SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
2863  }
2864 
2865  // Update the instruction to use NewVaddr
2866  VAddr->setReg(NewVAddr);
2867  // Update the instruction to use NewSRsrc
2868  SRsrc->setReg(NewSRsrc);
2869  }
2870 }
2871 
2874  Worklist.push_back(&TopInst);
2875 
2876  while (!Worklist.empty()) {
2877  MachineInstr &Inst = *Worklist.pop_back_val();
2878  MachineBasicBlock *MBB = Inst.getParent();
2879  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
2880 
2881  unsigned Opcode = Inst.getOpcode();
2882  unsigned NewOpcode = getVALUOp(Inst);
2883 
2884  // Handle some special cases
2885  switch (Opcode) {
2886  default:
2887  break;
2888  case AMDGPU::S_AND_B64:
2889  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
2890  Inst.eraseFromParent();
2891  continue;
2892 
2893  case AMDGPU::S_OR_B64:
2894  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
2895  Inst.eraseFromParent();
2896  continue;
2897 
2898  case AMDGPU::S_XOR_B64:
2899  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
2900  Inst.eraseFromParent();
2901  continue;
2902 
2903  case AMDGPU::S_NOT_B64:
2904  splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
2905  Inst.eraseFromParent();
2906  continue;
2907 
2908  case AMDGPU::S_BCNT1_I32_B64:
2909  splitScalar64BitBCNT(Worklist, Inst);
2910  Inst.eraseFromParent();
2911  continue;
2912 
2913  case AMDGPU::S_BFE_I64: {
2914  splitScalar64BitBFE(Worklist, Inst);
2915  Inst.eraseFromParent();
2916  continue;
2917  }
2918 
2919  case AMDGPU::S_LSHL_B32:
2921  NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
2922  swapOperands(Inst);
2923  }
2924  break;
2925  case AMDGPU::S_ASHR_I32:
2927  NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
2928  swapOperands(Inst);
2929  }
2930  break;
2931  case AMDGPU::S_LSHR_B32:
2933  NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
2934  swapOperands(Inst);
2935  }
2936  break;
2937  case AMDGPU::S_LSHL_B64:
2939  NewOpcode = AMDGPU::V_LSHLREV_B64;
2940  swapOperands(Inst);
2941  }
2942  break;
2943  case AMDGPU::S_ASHR_I64:
2945  NewOpcode = AMDGPU::V_ASHRREV_I64;
2946  swapOperands(Inst);
2947  }
2948  break;
2949  case AMDGPU::S_LSHR_B64:
2951  NewOpcode = AMDGPU::V_LSHRREV_B64;
2952  swapOperands(Inst);
2953  }
2954  break;
2955 
2956  case AMDGPU::S_ABS_I32:
2957  lowerScalarAbs(Worklist, Inst);
2958  Inst.eraseFromParent();
2959  continue;
2960 
2961  case AMDGPU::S_CBRANCH_SCC0:
2962  case AMDGPU::S_CBRANCH_SCC1:
2963  // Clear unused bits of vcc
2964  BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
2965  AMDGPU::VCC)
2966  .addReg(AMDGPU::EXEC)
2967  .addReg(AMDGPU::VCC);
2968  break;
2969 
2970  case AMDGPU::S_BFE_U64:
2971  case AMDGPU::S_BFM_B64:
2972  llvm_unreachable("Moving this op to VALU not implemented");
2973  }
2974 
2975  if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
2976  // We cannot move this instruction to the VALU, so we should try to
2977  // legalize its operands instead.
2978  legalizeOperands(Inst);
2979  continue;
2980  }
2981 
2982  // Use the new VALU Opcode.
2983  const MCInstrDesc &NewDesc = get(NewOpcode);
2984  Inst.setDesc(NewDesc);
2985 
2986  // Remove any references to SCC. Vector instructions can't read from it, and
2987  // We're just about to add the implicit use / defs of VCC, and we don't want
2988  // both.
2989  for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
2990  MachineOperand &Op = Inst.getOperand(i);
2991  if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
2992  Inst.RemoveOperand(i);
2993  addSCCDefUsersToVALUWorklist(Inst, Worklist);
2994  }
2995  }
2996 
2997  if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
2998  // We are converting these to a BFE, so we need to add the missing
2999  // operands for the size and offset.
3000  unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
3003 
3004  } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
3005  // The VALU version adds the second operand to the result, so insert an
3006  // extra 0 operand.
3008  }
3009 
3011 
3012  if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
3013  const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
3014  // If we need to move this to VGPRs, we need to unpack the second operand
3015  // back into the 2 separate ones for bit offset and width.
3016  assert(OffsetWidthOp.isImm() &&
3017  "Scalar BFE is only implemented for constant width and offset");
3018  uint32_t Imm = OffsetWidthOp.getImm();
3019 
3020  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
3021  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
3022  Inst.RemoveOperand(2); // Remove old immediate.
3023  Inst.addOperand(MachineOperand::CreateImm(Offset));
3024  Inst.addOperand(MachineOperand::CreateImm(BitWidth));
3025  }
3026 
3027  bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
3028  unsigned NewDstReg = AMDGPU::NoRegister;
3029  if (HasDst) {
3030  // Update the destination register class.
3031  const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
3032  if (!NewDstRC)
3033  continue;
3034 
3035  unsigned DstReg = Inst.getOperand(0).getReg();
3036  if (Inst.isCopy() &&
3038  NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
3039  // Instead of creating a copy where src and dst are the same register
3040  // class, we just replace all uses of dst with src. These kinds of
3041  // copies interfere with the heuristics MachineSink uses to decide
3042  // whether or not to split a critical edge. Since the pass assumes
3043  // that copies will end up as machine instructions and not be
3044  // eliminated.
3045  addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
3046  MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
3047  MRI.clearKillFlags(Inst.getOperand(1).getReg());
3048  Inst.getOperand(0).setReg(DstReg);
3049  continue;
3050  }
3051 
3052  NewDstReg = MRI.createVirtualRegister(NewDstRC);
3053  MRI.replaceRegWith(DstReg, NewDstReg);
3054  }
3055 
3056  // Legalize the operands
3057  legalizeOperands(Inst);
3058 
3059  if (HasDst)
3060  addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
3061  }
3062 }
3063 
3064 void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
3065  MachineInstr &Inst) const {
3066  MachineBasicBlock &MBB = *Inst.getParent();
3067  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3068  MachineBasicBlock::iterator MII = Inst;
3069  DebugLoc DL = Inst.getDebugLoc();
3070 
3071  MachineOperand &Dest = Inst.getOperand(0);
3072  MachineOperand &Src = Inst.getOperand(1);
3073  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3074  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3075 
3076  BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
3077  .addImm(0)
3078  .addReg(Src.getReg());
3079 
3080  BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
3081  .addReg(Src.getReg())
3082  .addReg(TmpReg);
3083 
3084  MRI.replaceRegWith(Dest.getReg(), ResultReg);
3085  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3086 }
3087 
3088 void SIInstrInfo::splitScalar64BitUnaryOp(
3090  unsigned Opcode) const {
3091  MachineBasicBlock &MBB = *Inst.getParent();
3092  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3093 
3094  MachineOperand &Dest = Inst.getOperand(0);
3095  MachineOperand &Src0 = Inst.getOperand(1);
3096  DebugLoc DL = Inst.getDebugLoc();
3097 
3098  MachineBasicBlock::iterator MII = Inst;
3099 
3100  const MCInstrDesc &InstDesc = get(Opcode);
3101  const TargetRegisterClass *Src0RC = Src0.isReg() ?
3102  MRI.getRegClass(Src0.getReg()) :
3103  &AMDGPU::SGPR_32RegClass;
3104 
3105  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
3106 
3107  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3108  AMDGPU::sub0, Src0SubRC);
3109 
3110  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
3111  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
3112  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
3113 
3114  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
3115  BuildMI(MBB, MII, DL, InstDesc, DestSub0)
3116  .addOperand(SrcReg0Sub0);
3117 
3118  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3119  AMDGPU::sub1, Src0SubRC);
3120 
3121  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
3122  BuildMI(MBB, MII, DL, InstDesc, DestSub1)
3123  .addOperand(SrcReg0Sub1);
3124 
3125  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
3126  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
3127  .addReg(DestSub0)
3128  .addImm(AMDGPU::sub0)
3129  .addReg(DestSub1)
3130  .addImm(AMDGPU::sub1);
3131 
3132  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
3133 
3134  // We don't need to legalizeOperands here because for a single operand, src0
3135  // will support any kind of input.
3136 
3137  // Move all users of this moved value.
3138  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
3139 }
3140 
3141 void SIInstrInfo::splitScalar64BitBinaryOp(
3143  unsigned Opcode) const {
3144  MachineBasicBlock &MBB = *Inst.getParent();
3145  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3146 
3147  MachineOperand &Dest = Inst.getOperand(0);
3148  MachineOperand &Src0 = Inst.getOperand(1);
3149  MachineOperand &Src1 = Inst.getOperand(2);
3150  DebugLoc DL = Inst.getDebugLoc();
3151 
3152  MachineBasicBlock::iterator MII = Inst;
3153 
3154  const MCInstrDesc &InstDesc = get(Opcode);
3155  const TargetRegisterClass *Src0RC = Src0.isReg() ?
3156  MRI.getRegClass(Src0.getReg()) :
3157  &AMDGPU::SGPR_32RegClass;
3158 
3159  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
3160  const TargetRegisterClass *Src1RC = Src1.isReg() ?
3161  MRI.getRegClass(Src1.getReg()) :
3162  &AMDGPU::SGPR_32RegClass;
3163 
3164  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
3165 
3166  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3167  AMDGPU::sub0, Src0SubRC);
3168  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
3169  AMDGPU::sub0, Src1SubRC);
3170 
3171  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
3172  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
3173  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
3174 
3175  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
3176  MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
3177  .addOperand(SrcReg0Sub0)
3178  .addOperand(SrcReg1Sub0);
3179 
3180  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3181  AMDGPU::sub1, Src0SubRC);
3182  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
3183  AMDGPU::sub1, Src1SubRC);
3184 
3185  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
3186  MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
3187  .addOperand(SrcReg0Sub1)
3188  .addOperand(SrcReg1Sub1);
3189 
3190  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
3191  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
3192  .addReg(DestSub0)
3193  .addImm(AMDGPU::sub0)
3194  .addReg(DestSub1)
3195  .addImm(AMDGPU::sub1);
3196 
3197  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
3198 
3199  // Try to legalize the operands in case we need to swap the order to keep it
3200  // valid.
3201  legalizeOperands(LoHalf);
3202  legalizeOperands(HiHalf);
3203 
3204  // Move all users of this moved vlaue.
3205  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
3206 }
3207 
3208 void SIInstrInfo::splitScalar64BitBCNT(
3209  SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst) const {
3210  MachineBasicBlock &MBB = *Inst.getParent();
3211  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3212 
3213  MachineBasicBlock::iterator MII = Inst;
3214  DebugLoc DL = Inst.getDebugLoc();
3215 
3216  MachineOperand &Dest = Inst.getOperand(0);
3217  MachineOperand &Src = Inst.getOperand(1);
3218 
3219  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
3220  const TargetRegisterClass *SrcRC = Src.isReg() ?
3221  MRI.getRegClass(Src.getReg()) :
3222  &AMDGPU::SGPR_32RegClass;
3223 
3224  unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3225  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3226 
3227  const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
3228 
3229  MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
3230  AMDGPU::sub0, SrcSubRC);
3231  MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
3232  AMDGPU::sub1, SrcSubRC);
3233 
3234  BuildMI(MBB, MII, DL, InstDesc, MidReg)
3235  .addOperand(SrcRegSub0)
3236  .addImm(0);
3237 
3238  BuildMI(MBB, MII, DL, InstDesc, ResultReg)
3239  .addOperand(SrcRegSub1)
3240  .addReg(MidReg);
3241 
3242  MRI.replaceRegWith(Dest.getReg(), ResultReg);
3243 
3244  // We don't need to legalize operands here. src0 for etiher instruction can be
3245  // an SGPR, and the second input is unused or determined here.
3246  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3247 }
3248 
3249 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
3250  MachineInstr &Inst) const {
3251  MachineBasicBlock &MBB = *Inst.getParent();
3252  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3253  MachineBasicBlock::iterator MII = Inst;
3254  DebugLoc DL = Inst.getDebugLoc();
3255 
3256  MachineOperand &Dest = Inst.getOperand(0);
3257  uint32_t Imm = Inst.getOperand(2).getImm();
3258  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
3259  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
3260 
3261  (void) Offset;
3262 
3263  // Only sext_inreg cases handled.
3264  assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
3265  Offset == 0 && "Not implemented");
3266 
3267  if (BitWidth < 32) {
3268  unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3269  unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3270  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
3271 
3272  BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
3273  .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
3274  .addImm(0)
3275  .addImm(BitWidth);
3276 
3277  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
3278  .addImm(31)
3279  .addReg(MidRegLo);
3280 
3281  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
3282  .addReg(MidRegLo)
3283  .addImm(AMDGPU::sub0)
3284  .addReg(MidRegHi)
3285  .addImm(AMDGPU::sub1);
3286 
3287  MRI.replaceRegWith(Dest.getReg(), ResultReg);
3288  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3289  return;
3290  }
3291 
3292  MachineOperand &Src = Inst.getOperand(1);
3293  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3294  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
3295 
3296  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
3297  .addImm(31)
3298  .addReg(Src.getReg(), 0, AMDGPU::sub0);
3299 
3300  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
3301  .addReg(Src.getReg(), 0, AMDGPU::sub0)
3302  .addImm(AMDGPU::sub0)
3303  .addReg(TmpReg)
3304  .addImm(AMDGPU::sub1);
3305 
3306  MRI.replaceRegWith(Dest.getReg(), ResultReg);
3307  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3308 }
3309 
3310 void SIInstrInfo::addUsersToMoveToVALUWorklist(
3311  unsigned DstReg,
3312  MachineRegisterInfo &MRI,
3313  SmallVectorImpl<MachineInstr *> &Worklist) const {
3314  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
3315  E = MRI.use_end(); I != E;) {
3316  MachineInstr &UseMI = *I->getParent();
3317  if (!canReadVGPR(UseMI, I.getOperandNo())) {
3318  Worklist.push_back(&UseMI);
3319 
3320  do {
3321  ++I;
3322  } while (I != E && I->getParent() == &UseMI);
3323  } else {
3324  ++I;
3325  }
3326  }
3327 }
3328 
3329 void SIInstrInfo::addSCCDefUsersToVALUWorklist(
3330  MachineInstr &SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const {
3331  // This assumes that all the users of SCC are in the same block
3332  // as the SCC def.
3333  for (MachineInstr &MI :
3335  SCCDefInst.getParent()->end())) {
3336  // Exit if we find another SCC def.
3337  if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
3338  return;
3339 
3340  if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
3341  Worklist.push_back(&MI);
3342  }
3343 }
3344 
3345 const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
3346  const MachineInstr &Inst) const {
3347  const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
3348 
3349  switch (Inst.getOpcode()) {
3350  // For target instructions, getOpRegClass just returns the virtual register
3351  // class associated with the operand, so we need to find an equivalent VGPR
3352  // register class in order to move the instruction to the VALU.
3353  case AMDGPU::COPY:
3354  case AMDGPU::PHI:
3355  case AMDGPU::REG_SEQUENCE:
3356  case AMDGPU::INSERT_SUBREG:
3357  if (RI.hasVGPRs(NewDstRC))
3358  return nullptr;
3359 
3360  NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
3361  if (!NewDstRC)
3362  return nullptr;
3363  return NewDstRC;
3364  default:
3365  return NewDstRC;
3366  }
3367 }
3368 
3369 // Find the one SGPR operand we are allowed to use.
3370 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
3371  int OpIndices[3]) const {
3372  const MCInstrDesc &Desc = MI.getDesc();
3373 
3374  // Find the one SGPR operand we are allowed to use.
3375  //
3376  // First we need to consider the instruction's operand requirements before
3377  // legalizing. Some operands are required to be SGPRs, such as implicit uses
3378  // of VCC, but we are still bound by the constant bus requirement to only use
3379  // one.
3380  //
3381  // If the operand's class is an SGPR, we can never move it.
3382 
3383  unsigned SGPRReg = findImplicitSGPRRead(MI);
3384  if (SGPRReg != AMDGPU::NoRegister)
3385  return SGPRReg;
3386 
3387  unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
3388  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3389 
3390  for (unsigned i = 0; i < 3; ++i) {
3391  int Idx = OpIndices[i];
3392  if (Idx == -1)
3393  break;
3394 
3395  const MachineOperand &MO = MI.getOperand(Idx);
3396  if (!MO.isReg())
3397  continue;
3398 
3399  // Is this operand statically required to be an SGPR based on the operand
3400  // constraints?
3401  const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
3402  bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
3403  if (IsRequiredSGPR)
3404  return MO.getReg();
3405 
3406  // If this could be a VGPR or an SGPR, Check the dynamic register class.
3407  unsigned Reg = MO.getReg();
3408  const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
3409  if (RI.isSGPRClass(RegRC))
3410  UsedSGPRs[i] = Reg;
3411  }
3412 
3413  // We don't have a required SGPR operand, so we have a bit more freedom in
3414  // selecting operands to move.
3415 
3416  // Try to select the most used SGPR. If an SGPR is equal to one of the
3417  // others, we choose that.
3418  //
3419  // e.g.
3420  // V_FMA_F32 v0, s0, s0, s0 -> No moves
3421  // V_FMA_F32 v0, s0, s1, s0 -> Move s1
3422 
3423  // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
3424  // prefer those.
3425 
3426  if (UsedSGPRs[0] != AMDGPU::NoRegister) {
3427  if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
3428  SGPRReg = UsedSGPRs[0];
3429  }
3430 
3431  if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
3432  if (UsedSGPRs[1] == UsedSGPRs[2])
3433  SGPRReg = UsedSGPRs[1];
3434  }
3435 
3436  return SGPRReg;
3437 }
3438 
3440  unsigned OperandName) const {
3441  int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
3442  if (Idx == -1)
3443  return nullptr;
3444 
3445  return &MI.getOperand(Idx);
3446 }
3447 
3449  uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
3450  if (ST.isAmdHsaOS()) {
3451  RsrcDataFormat |= (1ULL << 56);
3452 
3454  // Set MTYPE = 2
3455  RsrcDataFormat |= (2ULL << 59);
3456  }
3457 
3458  return RsrcDataFormat;
3459 }
3460 
3462  uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
3464  0xffffffff; // Size;
3465 
3466  uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
3467 
3468  Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) |
3469  // IndexStride = 64
3470  (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT);
3471 
3472  // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
3473  // Clear them unless we want a huge stride.
3475  Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
3476 
3477  return Rsrc23;
3478 }
3479 
3481  unsigned Opc = MI.getOpcode();
3482 
3483  return isSMRD(Opc);
3484 }
3485 
3487  unsigned Opc = MI.getOpcode();
3488 
3489  return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
3490 }
3491 
3493  int &FrameIndex) const {
3494  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
3495  if (!Addr || !Addr->isFI())
3496  return AMDGPU::NoRegister;
3497 
3498  assert(!MI.memoperands_empty() &&
3499  (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
3500 
3501  FrameIndex = Addr->getIndex();
3502  return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
3503 }
3504 
3506  int &FrameIndex) const {
3507  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
3508  assert(Addr && Addr->isFI());
3509  FrameIndex = Addr->getIndex();
3510  return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
3511 }
3512 
3514  int &FrameIndex) const {
3515 
3516  if (!MI.mayLoad())
3517  return AMDGPU::NoRegister;
3518 
3519  if (isMUBUF(MI) || isVGPRSpill(MI))
3520  return isStackAccess(MI, FrameIndex);
3521 
3522  if (isSGPRSpill(MI))
3523  return isSGPRStackAccess(MI, FrameIndex);
3524 
3525  return AMDGPU::NoRegister;
3526 }
3527 
3529  int &FrameIndex) const {
3530  if (!MI.mayStore())
3531  return AMDGPU::NoRegister;
3532 
3533  if (isMUBUF(MI) || isVGPRSpill(MI))
3534  return isStackAccess(MI, FrameIndex);
3535 
3536  if (isSGPRSpill(MI))
3537  return isSGPRStackAccess(MI, FrameIndex);
3538 
3539  return AMDGPU::NoRegister;
3540 }
3541 
3543  unsigned Opc = MI.getOpcode();
3544  const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
3545  unsigned DescSize = Desc.getSize();
3546 
3547  // If we have a definitive size, we can use it. Otherwise we need to inspect
3548  // the operands to know the size.
3549  //
3550  // FIXME: Instructions that have a base 32-bit encoding report their size as
3551  // 4, even though they are really 8 bytes if they have a literal operand.
3552  if (DescSize != 0 && DescSize != 4)
3553  return DescSize;
3554 
3555  if (Opc == AMDGPU::WAVE_BARRIER)
3556  return 0;
3557 
3558  // 4-byte instructions may have a 32-bit literal encoded after them. Check
3559  // operands that coud ever be literals.
3560  if (isVALU(MI) || isSALU(MI)) {
3561  if (isFixedSize(MI)) {
3562  assert(DescSize == 4);
3563  return DescSize;
3564  }
3565 
3566  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3567  if (Src0Idx == -1)
3568  return 4; // No operands.
3569 
3570  if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
3571  return 8;
3572 
3573  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
3574  if (Src1Idx == -1)
3575  return 4;
3576 
3577  if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
3578  return 8;
3579 
3580  return 4;
3581  }
3582 
3583  if (DescSize == 4)
3584  return 4;
3585 
3586  switch (Opc) {
3587  case AMDGPU::SI_MASK_BRANCH:
3588  case TargetOpcode::IMPLICIT_DEF:
3589  case TargetOpcode::KILL:
3590  case TargetOpcode::DBG_VALUE:
3591  case TargetOpcode::BUNDLE:
3593  return 0;
3594  case TargetOpcode::INLINEASM: {
3595  const MachineFunction *MF = MI.getParent()->getParent();
3596  const char *AsmStr = MI.getOperand(0).getSymbolName();
3597  return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
3598  }
3599  default:
3600  llvm_unreachable("unable to find instruction size");
3601  }
3602 }
3603 
3605  if (!isFLAT(MI))
3606  return false;
3607 
3608  if (MI.memoperands_empty())
3609  return true;
3610 
3611  for (const MachineMemOperand *MMO : MI.memoperands()) {
3612  if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
3613  return true;
3614  }
3615  return false;
3616 }
3617 
3620  static const std::pair<int, const char *> TargetIndices[] = {
3621  {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
3622  {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
3623  {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
3624  {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
3625  {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
3626  return makeArrayRef(TargetIndices);
3627 }
3628 
3629 /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
3630 /// post-RA version of misched uses CreateTargetMIHazardRecognizer.
3633  const ScheduleDAG *DAG) const {
3634  return new GCNHazardRecognizer(DAG->MF);
3635 }
3636 
3637 /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
3638 /// pass.
3641  return new GCNHazardRecognizer(MF);
3642 }
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
bool isImplicit() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static bool isSGPRSpill(const MachineInstr &MI)
Definition: SIInstrInfo.h:407
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:506
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
bool opCanUseInlineConstant(unsigned OpType) const
size_t i
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
bool isVGPRSpillingEnabled(const Function &F) const
static bool sopkIsZext(const MachineInstr &MI)
Definition: SIInstrInfo.h:431
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const
MachineBasicBlock * getMBB() const
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static bool isStride64(unsigned Opc)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:605
void setIsUndef(bool Val=true)
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
bool hasRegisterImplicitUseOperand(unsigned Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
bool isDead() const
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALU(MachineInstr &MI) const
Replace this instruction's opcode with the equivalent VALU opcode.
static bool isSOPK(const MachineInstr &MI)
Definition: SIInstrInfo.h:271
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
unsigned getScratchWaveOffsetReg() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
unsigned getNumOperands() const
Return the number of values used by this operation.
MachineBasicBlock reference.
const char * getSymbolName() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:271
A debug info location.
Definition: DebugLoc.h:34
const SDValue & getOperand(unsigned Num) const
bool isSGPRClass(const TargetRegisterClass *RC) const
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:365
static SDValue findChainOperand(SDNode *Load)
Definition: SIInstrInfo.cpp:52
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:225
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:335
void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
return AArch64::GPR64RegClass contains(Reg)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
static int operandBitWidth(uint8_t OperandType)
Definition: SIInstrInfo.h:465
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
unsigned getHWRegIndex(unsigned Reg) const
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
static bool isFixedSize(const MachineInstr &MI)
Definition: SIInstrInfo.h:449
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineFunction & MF
Definition: ScheduleDAG.h:581
unsigned getMaxFlatWorkGroupSize() const
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:343
unsigned getSize() const
Return the size of the register in bytes, which is also the size of a stack slot allocated to hold a ...
A description of a memory reference used in the backend.
static use_iterator use_end()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:367
void insertWaitStates(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Count) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const TargetRegisterInfo * getTargetRegisterInfo() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:351
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned SubReg
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:592
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
void setHasSpilledVGPRs(bool Spill=true)
Name of external global symbol.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
Definition: SIInstrInfo.h:709
Reg
All possible values of the reg field in the ModR/M byte.
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool isUndef() const
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:231
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:589
bool opCanUseLiteralConstant(unsigned OpType) const
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:82
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MachineBasicBlock & front() const
bool isKill() const
LLVM_READONLY int commuteOpcode(unsigned Opc) const
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:319
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const
MachineBasicBlock * MBB
static bool shouldReadExec(const MachineInstr &MI)
uint64_t getScratchRsrcWords23() const
const uint64_t RSRC_DATA_FORMAT
Definition: SIInstrInfo.h:761
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
Itinerary data supplied by a subtarget to be used by a target.
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
static unsigned findImplicitSGPRRead(const MachineInstr &MI)
int64_t getImm() const
Generation getGeneration() const
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
unsigned getKillRegState(bool B)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:359
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
Definition: SIInstrInfo.cpp:60
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Address of a global value.
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const final
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
unsigned getTargetFlags() const
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MachineInstrBuilder & setMemRefs(MachineInstr::mmo_iterator b, MachineInstr::mmo_iterator e) const
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
unsigned const MachineRegisterInfo * MRI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool isShader(CallingConv::ID cc)
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
bool hasVGPRs(const TargetRegisterClass *RC) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder & UseMI
bool isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const override
Definition: SIInstrInfo.cpp:85
Address space for private memory.
Definition: AMDGPU.h:138
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1321
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned getSubRegFromChannel(unsigned Channel) const
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:560
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
bool expandPostRAPseudo(MachineInstr &MI) const override
static unsigned getNumOperandsNoGlue(SDNode *Node)
Definition: SIInstrInfo.cpp:45
static void removeModOperands(MachineInstr &MI)
bool isCopy() const
Definition: MachineInstr.h:807
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
uint32_t Offset
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
bool isVariadic(QueryType Type=IgnoreBundle) const
Return true if this instruction can have a variable number of operands.
Definition: MachineInstr.h:404
bool hasScalarStores() const
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:373
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
uint64_t getDefaultRsrcDataFormat() const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
unsigned insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS=nullptr) const override
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
Address space for flat memory.
Definition: AMDGPU.h:142
The AMDGPU TargetMachine interface definition for hw codgen targets.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
static bool isVOP2(const MachineInstr &MI)
Definition: SIInstrInfo.h:295
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
unsigned getSubReg() const
bool isIntN(unsigned N, int64_t x)
isIntN - Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:366
void setHasSpilledSGPRs(bool Spill=true)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isHighLatencyInstruction(const MachineInstr &MI) const
bool hasInv2PiInlineImm() const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Operands with register or inline constant.
Definition: SIDefines.h:99
SIInstrInfo(const SISubtarget &)
Definition: SIInstrInfo.cpp:38
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:594
void setIsKill(bool Val=true)
const uint64_t RSRC_TID_ENABLE
Definition: SIInstrInfo.h:764
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:203
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const final
The memory access writes data.
bool isInlineConstant(const APInt &Imm) const
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the the memory access done by this i...
Definition: MachineInstr.h:363
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
static bool offsetsDoNotOverlap(int WidthA, int OffsetA, int WidthB, int OffsetB)
static bool isSALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:223
MachineOperand class - Representation of each machine instruction operand.
unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool isInlineAsm() const
Definition: MachineInstr.h:789
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
const uint64_t RSRC_INDEX_STRIDE_SHIFT
Definition: SIInstrInfo.h:763
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
Represents one node in the SelectionDAG.
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
const MachineInstrBuilder & addFrameIndex(int Idx) const
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
MCSymbol reference (for debug/eh info)
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:513
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SynchronizationScope SynchScope=CrossThread, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Class for arbitrary precision integers.
Definition: APInt.h:77
const Value * getValue() const
Return the base address of the memory access.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
bool isAmdHsaOS() const
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:315
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
static bool isVOP3(const MachineInstr &MI)
Definition: SIInstrInfo.h:303
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt, unsigned NumLoads) const final
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
Representation of each machine instruction.
Definition: MachineInstr.h:52
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
bool isVGPRCopy(const MachineInstr &MI) const
Definition: SIInstrInfo.h:457
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
static bool isMTBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:327
OperandType
Types of operands to CF instructions.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned getMaxPrivateElementSize() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
Definition: SIInstrInfo.h:696
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:311
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
use_iterator use_begin(unsigned RegNo) const
unsigned getNumWaitStates(const MachineInstr &MI) const
Return the number of wait states that result from executing this instruction.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
void setSubReg(unsigned subReg)
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
bool isLowLatencyInstruction(const MachineInstr &MI) const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
Abstract Stack Frame Index.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:312
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it. ...
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
EVT getValueType() const
Return the ValueType of the referenced return value.
static unsigned getVALUOp(const MachineInstr &MI)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Operands with register or 32-bit immediate.
Definition: SIDefines.h:91
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const
unsigned getReg() const
getReg - Returns the register number.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:633
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
void setRegUsed(unsigned Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
Definition: SIInstrInfo.h:762
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
MachineInstr * convertToThreeAddress(MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const override
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool isVGPRSpill(const MachineInstr &MI)
Definition: SIInstrInfo.h:399
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:119
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
bool isDebug() const
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
uint64_t getSize() const
Return the size in bytes of the memory reference.
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
IRTranslator LLVM IR MI
void legalizeOperands(MachineInstr &MI) const
Legalize all operands in this instruction.
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
bool isLiteralConstantLike(const MachineOperand &MO, const MCOperandInfo &OpInfo) const
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:70
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:491
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
static bool isVOP1(const MachineInstr &MI)
Definition: SIInstrInfo.h:287
unsigned pred_size() const
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:903
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
Helper class for constructing bundles of MachineInstrs.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
uint64_t getZExtValue() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:358
unsigned scavengeRegister(const TargetRegisterClass *RegClass, MachineBasicBlock::iterator I, int SPAdj)
Make a register of the specific register class available and do the appropriate bookkeeping.
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.