LLVM  6.0.0svn
SIInstrInfo.cpp
Go to the documentation of this file.
1 //===- SIInstrInfo.cpp - SI Instruction Information ----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI Implementation of TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIInstrInfo.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "GCNHazardRecognizer.h"
19 #include "SIDefines.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "SIRegisterInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/InlineAsm.h"
50 #include "llvm/IR/LLVMContext.h"
51 #include "llvm/MC/MCInstrDesc.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
58 #include <cassert>
59 #include <cstdint>
60 #include <iterator>
61 #include <utility>
62 
63 using namespace llvm;
64 
65 // Must be at least 4 to be able to branch over minimum unconditional branch
66 // code. This is only for making it possible to write reasonably small tests for
67 // long branches.
68 static cl::opt<unsigned>
69 BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
70  cl::desc("Restrict range of branch instructions (DEBUG)"));
71 
73  : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
74 
75 //===----------------------------------------------------------------------===//
76 // TargetInstrInfo callbacks
77 //===----------------------------------------------------------------------===//
78 
79 static unsigned getNumOperandsNoGlue(SDNode *Node) {
80  unsigned N = Node->getNumOperands();
81  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
82  --N;
83  return N;
84 }
85 
87  SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
88  assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
89  return LastOp;
90 }
91 
92 /// \brief Returns true if both nodes have the same value for the given
93 /// operand \p Op, or if both nodes do not have this operand.
94 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
95  unsigned Opc0 = N0->getMachineOpcode();
96  unsigned Opc1 = N1->getMachineOpcode();
97 
98  int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
99  int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
100 
101  if (Op0Idx == -1 && Op1Idx == -1)
102  return true;
103 
104 
105  if ((Op0Idx == -1 && Op1Idx != -1) ||
106  (Op1Idx == -1 && Op0Idx != -1))
107  return false;
108 
109  // getNamedOperandIdx returns the index for the MachineInstr's operands,
110  // which includes the result as the first operand. We are indexing into the
111  // MachineSDNode's operands, so we need to skip the result operand to get
112  // the real index.
113  --Op0Idx;
114  --Op1Idx;
115 
116  return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
117 }
118 
120  AliasAnalysis *AA) const {
121  // TODO: The generic check fails for VALU instructions that should be
122  // rematerializable due to implicit reads of exec. We really want all of the
123  // generic logic for this except for this.
124  switch (MI.getOpcode()) {
125  case AMDGPU::V_MOV_B32_e32:
126  case AMDGPU::V_MOV_B32_e64:
127  case AMDGPU::V_MOV_B64_PSEUDO:
128  return true;
129  default:
130  return false;
131  }
132 }
133 
135  int64_t &Offset0,
136  int64_t &Offset1) const {
137  if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
138  return false;
139 
140  unsigned Opc0 = Load0->getMachineOpcode();
141  unsigned Opc1 = Load1->getMachineOpcode();
142 
143  // Make sure both are actually loads.
144  if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
145  return false;
146 
147  if (isDS(Opc0) && isDS(Opc1)) {
148 
149  // FIXME: Handle this case:
150  if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
151  return false;
152 
153  // Check base reg.
154  if (Load0->getOperand(1) != Load1->getOperand(1))
155  return false;
156 
157  // Check chain.
158  if (findChainOperand(Load0) != findChainOperand(Load1))
159  return false;
160 
161  // Skip read2 / write2 variants for simplicity.
162  // TODO: We should report true if the used offsets are adjacent (excluded
163  // st64 versions).
164  if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
165  AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
166  return false;
167 
168  Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
169  Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
170  return true;
171  }
172 
173  if (isSMRD(Opc0) && isSMRD(Opc1)) {
174  // Skip time and cache invalidation instructions.
175  if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 ||
176  AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
177  return false;
178 
180 
181  // Check base reg.
182  if (Load0->getOperand(0) != Load1->getOperand(0))
183  return false;
184 
185  const ConstantSDNode *Load0Offset =
186  dyn_cast<ConstantSDNode>(Load0->getOperand(1));
187  const ConstantSDNode *Load1Offset =
188  dyn_cast<ConstantSDNode>(Load1->getOperand(1));
189 
190  if (!Load0Offset || !Load1Offset)
191  return false;
192 
193  // Check chain.
194  if (findChainOperand(Load0) != findChainOperand(Load1))
195  return false;
196 
197  Offset0 = Load0Offset->getZExtValue();
198  Offset1 = Load1Offset->getZExtValue();
199  return true;
200  }
201 
202  // MUBUF and MTBUF can access the same addresses.
203  if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
204 
205  // MUBUF and MTBUF have vaddr at different indices.
206  if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
207  findChainOperand(Load0) != findChainOperand(Load1) ||
208  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
209  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
210  return false;
211 
212  int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
213  int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
214 
215  if (OffIdx0 == -1 || OffIdx1 == -1)
216  return false;
217 
218  // getNamedOperandIdx returns the index for MachineInstrs. Since they
219  // inlcude the output in the operand list, but SDNodes don't, we need to
220  // subtract the index by one.
221  --OffIdx0;
222  --OffIdx1;
223 
224  SDValue Off0 = Load0->getOperand(OffIdx0);
225  SDValue Off1 = Load1->getOperand(OffIdx1);
226 
227  // The offset might be a FrameIndexSDNode.
228  if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
229  return false;
230 
231  Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
232  Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
233  return true;
234  }
235 
236  return false;
237 }
238 
239 static bool isStride64(unsigned Opc) {
240  switch (Opc) {
241  case AMDGPU::DS_READ2ST64_B32:
242  case AMDGPU::DS_READ2ST64_B64:
243  case AMDGPU::DS_WRITE2ST64_B32:
244  case AMDGPU::DS_WRITE2ST64_B64:
245  return true;
246  default:
247  return false;
248  }
249 }
250 
251 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
252  int64_t &Offset,
253  const TargetRegisterInfo *TRI) const {
254  unsigned Opc = LdSt.getOpcode();
255 
256  if (isDS(LdSt)) {
257  const MachineOperand *OffsetImm =
258  getNamedOperand(LdSt, AMDGPU::OpName::offset);
259  if (OffsetImm) {
260  // Normal, single offset LDS instruction.
261  const MachineOperand *AddrReg =
262  getNamedOperand(LdSt, AMDGPU::OpName::addr);
263 
264  BaseReg = AddrReg->getReg();
265  Offset = OffsetImm->getImm();
266  return true;
267  }
268 
269  // The 2 offset instructions use offset0 and offset1 instead. We can treat
270  // these as a load with a single offset if the 2 offsets are consecutive. We
271  // will use this for some partially aligned loads.
272  const MachineOperand *Offset0Imm =
273  getNamedOperand(LdSt, AMDGPU::OpName::offset0);
274  const MachineOperand *Offset1Imm =
275  getNamedOperand(LdSt, AMDGPU::OpName::offset1);
276 
277  uint8_t Offset0 = Offset0Imm->getImm();
278  uint8_t Offset1 = Offset1Imm->getImm();
279 
280  if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
281  // Each of these offsets is in element sized units, so we need to convert
282  // to bytes of the individual reads.
283 
284  unsigned EltSize;
285  if (LdSt.mayLoad())
286  EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
287  else {
288  assert(LdSt.mayStore());
289  int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
290  EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
291  }
292 
293  if (isStride64(Opc))
294  EltSize *= 64;
295 
296  const MachineOperand *AddrReg =
297  getNamedOperand(LdSt, AMDGPU::OpName::addr);
298  BaseReg = AddrReg->getReg();
299  Offset = EltSize * Offset0;
300  return true;
301  }
302 
303  return false;
304  }
305 
306  if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
307  const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
308  if (SOffset && SOffset->isReg())
309  return false;
310 
311  const MachineOperand *AddrReg =
312  getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
313  if (!AddrReg)
314  return false;
315 
316  const MachineOperand *OffsetImm =
317  getNamedOperand(LdSt, AMDGPU::OpName::offset);
318  BaseReg = AddrReg->getReg();
319  Offset = OffsetImm->getImm();
320 
321  if (SOffset) // soffset can be an inline immediate.
322  Offset += SOffset->getImm();
323 
324  return true;
325  }
326 
327  if (isSMRD(LdSt)) {
328  const MachineOperand *OffsetImm =
329  getNamedOperand(LdSt, AMDGPU::OpName::offset);
330  if (!OffsetImm)
331  return false;
332 
333  const MachineOperand *SBaseReg =
334  getNamedOperand(LdSt, AMDGPU::OpName::sbase);
335  BaseReg = SBaseReg->getReg();
336  Offset = OffsetImm->getImm();
337  return true;
338  }
339 
340  if (isFLAT(LdSt)) {
341  const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
342  if (VAddr) {
343  // Can't analyze 2 offsets.
344  if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
345  return false;
346 
347  BaseReg = VAddr->getReg();
348  } else {
349  // scratch instructions have either vaddr or saddr.
350  BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg();
351  }
352 
353  Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
354  return true;
355  }
356 
357  return false;
358 }
359 
360 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1,
361  const MachineInstr &MI2, unsigned BaseReg2) {
362  if (BaseReg1 == BaseReg2)
363  return true;
364 
365  if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
366  return false;
367 
368  auto MO1 = *MI1.memoperands_begin();
369  auto MO2 = *MI2.memoperands_begin();
370  if (MO1->getAddrSpace() != MO2->getAddrSpace())
371  return false;
372 
373  auto Base1 = MO1->getValue();
374  auto Base2 = MO2->getValue();
375  if (!Base1 || !Base2)
376  return false;
377  const MachineFunction &MF = *MI1.getParent()->getParent();
378  const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout();
379  Base1 = GetUnderlyingObject(Base1, DL);
380  Base2 = GetUnderlyingObject(Base1, DL);
381 
382  if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
383  return false;
384 
385  return Base1 == Base2;
386 }
387 
389  unsigned BaseReg1,
390  MachineInstr &SecondLdSt,
391  unsigned BaseReg2,
392  unsigned NumLoads) const {
393  if (!memOpsHaveSameBasePtr(FirstLdSt, BaseReg1, SecondLdSt, BaseReg2))
394  return false;
395 
396  const MachineOperand *FirstDst = nullptr;
397  const MachineOperand *SecondDst = nullptr;
398 
399  if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
400  (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
401  (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
402  const unsigned MaxGlobalLoadCluster = 6;
403  if (NumLoads > MaxGlobalLoadCluster)
404  return false;
405 
406  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
407  if (!FirstDst)
408  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
409  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
410  if (!SecondDst)
411  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
412  } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
413  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
414  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
415  } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
416  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
417  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
418  }
419 
420  if (!FirstDst || !SecondDst)
421  return false;
422 
423  // Try to limit clustering based on the total number of bytes loaded
424  // rather than the number of instructions. This is done to help reduce
425  // register pressure. The method used is somewhat inexact, though,
426  // because it assumes that all loads in the cluster will load the
427  // same number of bytes as FirstLdSt.
428 
429  // The unit of this value is bytes.
430  // FIXME: This needs finer tuning.
431  unsigned LoadClusterThreshold = 16;
432 
433  const MachineRegisterInfo &MRI =
434  FirstLdSt.getParent()->getParent()->getRegInfo();
435  const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
436 
437  return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
438 }
439 
442  const DebugLoc &DL, unsigned DestReg,
443  unsigned SrcReg, bool KillSrc) {
444  MachineFunction *MF = MBB.getParent();
445  DiagnosticInfoUnsupported IllegalCopy(*MF->getFunction(),
446  "illegal SGPR to VGPR copy",
447  DL, DS_Error);
448  LLVMContext &C = MF->getFunction()->getContext();
449  C.diagnose(IllegalCopy);
450 
451  BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
452  .addReg(SrcReg, getKillRegState(KillSrc));
453 }
454 
457  const DebugLoc &DL, unsigned DestReg,
458  unsigned SrcReg, bool KillSrc) const {
459  const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
460 
461  if (RC == &AMDGPU::VGPR_32RegClass) {
462  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
463  AMDGPU::SReg_32RegClass.contains(SrcReg));
464  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
465  .addReg(SrcReg, getKillRegState(KillSrc));
466  return;
467  }
468 
469  if (RC == &AMDGPU::SReg_32_XM0RegClass ||
470  RC == &AMDGPU::SReg_32RegClass) {
471  if (SrcReg == AMDGPU::SCC) {
472  BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
473  .addImm(-1)
474  .addImm(0);
475  return;
476  }
477 
478  if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
479  reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
480  return;
481  }
482 
483  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
484  .addReg(SrcReg, getKillRegState(KillSrc));
485  return;
486  }
487 
488  if (RC == &AMDGPU::SReg_64RegClass) {
489  if (DestReg == AMDGPU::VCC) {
490  if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
491  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
492  .addReg(SrcReg, getKillRegState(KillSrc));
493  } else {
494  // FIXME: Hack until VReg_1 removed.
495  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
496  BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
497  .addImm(0)
498  .addReg(SrcReg, getKillRegState(KillSrc));
499  }
500 
501  return;
502  }
503 
504  if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
505  reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
506  return;
507  }
508 
509  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
510  .addReg(SrcReg, getKillRegState(KillSrc));
511  return;
512  }
513 
514  if (DestReg == AMDGPU::SCC) {
515  assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
516  BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
517  .addReg(SrcReg, getKillRegState(KillSrc))
518  .addImm(0);
519  return;
520  }
521 
522  unsigned EltSize = 4;
523  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
524  if (RI.isSGPRClass(RC)) {
525  if (RI.getRegSizeInBits(*RC) > 32) {
526  Opcode = AMDGPU::S_MOV_B64;
527  EltSize = 8;
528  } else {
529  Opcode = AMDGPU::S_MOV_B32;
530  EltSize = 4;
531  }
532 
533  if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
534  reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
535  return;
536  }
537  }
538 
539  ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
540  bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
541 
542  for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
543  unsigned SubIdx;
544  if (Forward)
545  SubIdx = SubIndices[Idx];
546  else
547  SubIdx = SubIndices[SubIndices.size() - Idx - 1];
548 
549  MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
550  get(Opcode), RI.getSubReg(DestReg, SubIdx));
551 
552  Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
553 
554  if (Idx == 0)
555  Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
556 
557  bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
558  Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
559  }
560 }
561 
562 int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
563  int NewOpc;
564 
565  // Try to map original to commuted opcode
566  NewOpc = AMDGPU::getCommuteRev(Opcode);
567  if (NewOpc != -1)
568  // Check if the commuted (REV) opcode exists on the target.
569  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
570 
571  // Try to map commuted to original opcode
572  NewOpc = AMDGPU::getCommuteOrig(Opcode);
573  if (NewOpc != -1)
574  // Check if the original (non-REV) opcode exists on the target.
575  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
576 
577  return Opcode;
578 }
579 
582  const DebugLoc &DL, unsigned DestReg,
583  int64_t Value) const {
585  const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
586  if (RegClass == &AMDGPU::SReg_32RegClass ||
587  RegClass == &AMDGPU::SGPR_32RegClass ||
588  RegClass == &AMDGPU::SReg_32_XM0RegClass ||
589  RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
590  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
591  .addImm(Value);
592  return;
593  }
594 
595  if (RegClass == &AMDGPU::SReg_64RegClass ||
596  RegClass == &AMDGPU::SGPR_64RegClass ||
597  RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
598  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
599  .addImm(Value);
600  return;
601  }
602 
603  if (RegClass == &AMDGPU::VGPR_32RegClass) {
604  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
605  .addImm(Value);
606  return;
607  }
608  if (RegClass == &AMDGPU::VReg_64RegClass) {
609  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
610  .addImm(Value);
611  return;
612  }
613 
614  unsigned EltSize = 4;
615  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
616  if (RI.isSGPRClass(RegClass)) {
617  if (RI.getRegSizeInBits(*RegClass) > 32) {
618  Opcode = AMDGPU::S_MOV_B64;
619  EltSize = 8;
620  } else {
621  Opcode = AMDGPU::S_MOV_B32;
622  EltSize = 4;
623  }
624  }
625 
626  ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
627  for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
628  int64_t IdxValue = Idx == 0 ? Value : 0;
629 
630  MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
631  get(Opcode), RI.getSubReg(DestReg, Idx));
632  Builder.addImm(IdxValue);
633  }
634 }
635 
636 const TargetRegisterClass *
638  return &AMDGPU::VGPR_32RegClass;
639 }
640 
643  const DebugLoc &DL, unsigned DstReg,
645  unsigned TrueReg,
646  unsigned FalseReg) const {
648  assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
649  "Not a VGPR32 reg");
650 
651  if (Cond.size() == 1) {
652  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
653  BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
654  .add(Cond[0]);
655  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
656  .addReg(FalseReg)
657  .addReg(TrueReg)
658  .addReg(SReg);
659  } else if (Cond.size() == 2) {
660  assert(Cond[0].isImm() && "Cond[0] is not an immediate");
661  switch (Cond[0].getImm()) {
662  case SIInstrInfo::SCC_TRUE: {
663  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
664  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
665  .addImm(-1)
666  .addImm(0);
667  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
668  .addReg(FalseReg)
669  .addReg(TrueReg)
670  .addReg(SReg);
671  break;
672  }
673  case SIInstrInfo::SCC_FALSE: {
674  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
675  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
676  .addImm(0)
677  .addImm(-1);
678  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
679  .addReg(FalseReg)
680  .addReg(TrueReg)
681  .addReg(SReg);
682  break;
683  }
684  case SIInstrInfo::VCCNZ: {
685  MachineOperand RegOp = Cond[1];
686  RegOp.setImplicit(false);
687  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
688  BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
689  .add(RegOp);
690  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
691  .addReg(FalseReg)
692  .addReg(TrueReg)
693  .addReg(SReg);
694  break;
695  }
696  case SIInstrInfo::VCCZ: {
697  MachineOperand RegOp = Cond[1];
698  RegOp.setImplicit(false);
699  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
700  BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
701  .add(RegOp);
702  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
703  .addReg(TrueReg)
704  .addReg(FalseReg)
705  .addReg(SReg);
706  break;
707  }
708  case SIInstrInfo::EXECNZ: {
709  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
710  unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
711  BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
712  .addImm(0);
713  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
714  .addImm(-1)
715  .addImm(0);
716  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
717  .addReg(FalseReg)
718  .addReg(TrueReg)
719  .addReg(SReg);
720  break;
721  }
722  case SIInstrInfo::EXECZ: {
723  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
724  unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
725  BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
726  .addImm(0);
727  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
728  .addImm(0)
729  .addImm(-1);
730  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
731  .addReg(FalseReg)
732  .addReg(TrueReg)
733  .addReg(SReg);
734  llvm_unreachable("Unhandled branch predicate EXECZ");
735  break;
736  }
737  default:
738  llvm_unreachable("invalid branch predicate");
739  }
740  } else {
741  llvm_unreachable("Can only handle Cond size 1 or 2");
742  }
743 }
744 
747  const DebugLoc &DL,
748  unsigned SrcReg, int Value) const {
750  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
751  BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
752  .addImm(Value)
753  .addReg(SrcReg);
754 
755  return Reg;
756 }
757 
760  const DebugLoc &DL,
761  unsigned SrcReg, int Value) const {
763  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
764  BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
765  .addImm(Value)
766  .addReg(SrcReg);
767 
768  return Reg;
769 }
770 
771 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
772 
773  if (RI.getRegSizeInBits(*DstRC) == 32) {
774  return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
775  } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
776  return AMDGPU::S_MOV_B64;
777  } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
778  return AMDGPU::V_MOV_B64_PSEUDO;
779  }
780  return AMDGPU::COPY;
781 }
782 
783 static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
784  switch (Size) {
785  case 4:
786  return AMDGPU::SI_SPILL_S32_SAVE;
787  case 8:
788  return AMDGPU::SI_SPILL_S64_SAVE;
789  case 16:
790  return AMDGPU::SI_SPILL_S128_SAVE;
791  case 32:
792  return AMDGPU::SI_SPILL_S256_SAVE;
793  case 64:
794  return AMDGPU::SI_SPILL_S512_SAVE;
795  default:
796  llvm_unreachable("unknown register size");
797  }
798 }
799 
800 static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
801  switch (Size) {
802  case 4:
803  return AMDGPU::SI_SPILL_V32_SAVE;
804  case 8:
805  return AMDGPU::SI_SPILL_V64_SAVE;
806  case 12:
807  return AMDGPU::SI_SPILL_V96_SAVE;
808  case 16:
809  return AMDGPU::SI_SPILL_V128_SAVE;
810  case 32:
811  return AMDGPU::SI_SPILL_V256_SAVE;
812  case 64:
813  return AMDGPU::SI_SPILL_V512_SAVE;
814  default:
815  llvm_unreachable("unknown register size");
816  }
817 }
818 
821  unsigned SrcReg, bool isKill,
822  int FrameIndex,
823  const TargetRegisterClass *RC,
824  const TargetRegisterInfo *TRI) const {
825  MachineFunction *MF = MBB.getParent();
827  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
828  DebugLoc DL = MBB.findDebugLoc(MI);
829 
830  assert(SrcReg != MFI->getStackPtrOffsetReg() &&
831  SrcReg != MFI->getFrameOffsetReg() &&
832  SrcReg != MFI->getScratchWaveOffsetReg());
833 
834  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
835  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
836  MachinePointerInfo PtrInfo
837  = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
838  MachineMemOperand *MMO
840  Size, Align);
841  unsigned SpillSize = TRI->getSpillSize(*RC);
842 
843  if (RI.isSGPRClass(RC)) {
844  MFI->setHasSpilledSGPRs();
845 
846  // We are only allowed to create one new instruction when spilling
847  // registers, so we need to use pseudo instruction for spilling SGPRs.
848  const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
849 
850  // The SGPR spill/restore instructions only work on number sgprs, so we need
851  // to make sure we are using the correct register class.
852  if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
854  MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
855  }
856 
857  MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
858  .addReg(SrcReg, getKillRegState(isKill)) // data
859  .addFrameIndex(FrameIndex) // addr
860  .addMemOperand(MMO)
862  .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
863  // Add the scratch resource registers as implicit uses because we may end up
864  // needing them, and need to ensure that the reserved registers are
865  // correctly handled.
866 
867  FrameInfo.setStackID(FrameIndex, 1);
868  if (ST.hasScalarStores()) {
869  // m0 is used for offset to scalar stores if used to spill.
870  Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
871  }
872 
873  return;
874  }
875 
876  if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
877  LLVMContext &Ctx = MF->getFunction()->getContext();
878  Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
879  " spill register");
880  BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
881  .addReg(SrcReg);
882 
883  return;
884  }
885 
886  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
887 
888  unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
889  MFI->setHasSpilledVGPRs();
890  BuildMI(MBB, MI, DL, get(Opcode))
891  .addReg(SrcReg, getKillRegState(isKill)) // data
892  .addFrameIndex(FrameIndex) // addr
893  .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
894  .addReg(MFI->getFrameOffsetReg()) // scratch_offset
895  .addImm(0) // offset
896  .addMemOperand(MMO);
897 }
898 
899 static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
900  switch (Size) {
901  case 4:
902  return AMDGPU::SI_SPILL_S32_RESTORE;
903  case 8:
904  return AMDGPU::SI_SPILL_S64_RESTORE;
905  case 16:
906  return AMDGPU::SI_SPILL_S128_RESTORE;
907  case 32:
908  return AMDGPU::SI_SPILL_S256_RESTORE;
909  case 64:
910  return AMDGPU::SI_SPILL_S512_RESTORE;
911  default:
912  llvm_unreachable("unknown register size");
913  }
914 }
915 
916 static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
917  switch (Size) {
918  case 4:
919  return AMDGPU::SI_SPILL_V32_RESTORE;
920  case 8:
921  return AMDGPU::SI_SPILL_V64_RESTORE;
922  case 12:
923  return AMDGPU::SI_SPILL_V96_RESTORE;
924  case 16:
925  return AMDGPU::SI_SPILL_V128_RESTORE;
926  case 32:
927  return AMDGPU::SI_SPILL_V256_RESTORE;
928  case 64:
929  return AMDGPU::SI_SPILL_V512_RESTORE;
930  default:
931  llvm_unreachable("unknown register size");
932  }
933 }
934 
937  unsigned DestReg, int FrameIndex,
938  const TargetRegisterClass *RC,
939  const TargetRegisterInfo *TRI) const {
940  MachineFunction *MF = MBB.getParent();
942  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
943  DebugLoc DL = MBB.findDebugLoc(MI);
944  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
945  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
946  unsigned SpillSize = TRI->getSpillSize(*RC);
947 
948  MachinePointerInfo PtrInfo
949  = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
950 
952  PtrInfo, MachineMemOperand::MOLoad, Size, Align);
953 
954  if (RI.isSGPRClass(RC)) {
955  // FIXME: Maybe this should not include a memoperand because it will be
956  // lowered to non-memory instructions.
957  const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
958  if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
960  MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
961  }
962 
963  FrameInfo.setStackID(FrameIndex, 1);
964  MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
965  .addFrameIndex(FrameIndex) // addr
966  .addMemOperand(MMO)
968  .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
969 
970  if (ST.hasScalarStores()) {
971  // m0 is used for offset to scalar stores if used to spill.
972  Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
973  }
974 
975  return;
976  }
977 
978  if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
979  LLVMContext &Ctx = MF->getFunction()->getContext();
980  Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
981  " restore register");
982  BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
983 
984  return;
985  }
986 
987  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
988 
989  unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
990  BuildMI(MBB, MI, DL, get(Opcode), DestReg)
991  .addFrameIndex(FrameIndex) // vaddr
992  .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
993  .addReg(MFI->getFrameOffsetReg()) // scratch_offset
994  .addImm(0) // offset
995  .addMemOperand(MMO);
996 }
997 
998 /// \param @Offset Offset in bytes of the FrameIndex being spilled
1000  MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
1001  unsigned FrameOffset, unsigned Size) const {
1002  MachineFunction *MF = MBB.getParent();
1004  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
1005  DebugLoc DL = MBB.findDebugLoc(MI);
1006  unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
1007  unsigned WavefrontSize = ST.getWavefrontSize();
1008 
1009  unsigned TIDReg = MFI->getTIDReg();
1010  if (!MFI->hasCalculatedTID()) {
1011  MachineBasicBlock &Entry = MBB.getParent()->front();
1012  MachineBasicBlock::iterator Insert = Entry.front();
1013  DebugLoc DL = Insert->getDebugLoc();
1014 
1015  TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
1016  *MF);
1017  if (TIDReg == AMDGPU::NoRegister)
1018  return TIDReg;
1019 
1020  if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) &&
1021  WorkGroupSize > WavefrontSize) {
1022  unsigned TIDIGXReg
1024  unsigned TIDIGYReg
1026  unsigned TIDIGZReg
1028  unsigned InputPtrReg =
1030  for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
1031  if (!Entry.isLiveIn(Reg))
1032  Entry.addLiveIn(Reg);
1033  }
1034 
1035  RS->enterBasicBlock(Entry);
1036  // FIXME: Can we scavenge an SReg_64 and access the subregs?
1037  unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
1038  unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
1039  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
1040  .addReg(InputPtrReg)
1042  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
1043  .addReg(InputPtrReg)
1045 
1046  // NGROUPS.X * NGROUPS.Y
1047  BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
1048  .addReg(STmp1)
1049  .addReg(STmp0);
1050  // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
1051  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
1052  .addReg(STmp1)
1053  .addReg(TIDIGXReg);
1054  // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
1055  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
1056  .addReg(STmp0)
1057  .addReg(TIDIGYReg)
1058  .addReg(TIDReg);
1059  // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
1060  BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
1061  .addReg(TIDReg)
1062  .addReg(TIDIGZReg);
1063  } else {
1064  // Get the wave id
1065  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
1066  TIDReg)
1067  .addImm(-1)
1068  .addImm(0);
1069 
1070  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
1071  TIDReg)
1072  .addImm(-1)
1073  .addReg(TIDReg);
1074  }
1075 
1076  BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
1077  TIDReg)
1078  .addImm(2)
1079  .addReg(TIDReg);
1080  MFI->setTIDReg(TIDReg);
1081  }
1082 
1083  // Add FrameIndex to LDS offset
1084  unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
1085  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
1086  .addImm(LDSOffset)
1087  .addReg(TIDReg);
1088 
1089  return TmpReg;
1090 }
1091 
1094  int Count) const {
1095  DebugLoc DL = MBB.findDebugLoc(MI);
1096  while (Count > 0) {
1097  int Arg;
1098  if (Count >= 8)
1099  Arg = 7;
1100  else
1101  Arg = Count - 1;
1102  Count -= 8;
1103  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
1104  .addImm(Arg);
1105  }
1106 }
1107 
1110  insertWaitStates(MBB, MI, 1);
1111 }
1112 
1114  auto MF = MBB.getParent();
1115  SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1116 
1117  assert(Info->isEntryFunction());
1118 
1119  if (MBB.succ_empty()) {
1120  bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
1121  if (HasNoTerminator)
1122  BuildMI(MBB, MBB.end(), DebugLoc(),
1123  get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
1124  }
1125 }
1126 
1128  switch (MI.getOpcode()) {
1129  default: return 1; // FIXME: Do wait states equal cycles?
1130 
1131  case AMDGPU::S_NOP:
1132  return MI.getOperand(0).getImm() + 1;
1133  }
1134 }
1135 
1137  MachineBasicBlock &MBB = *MI.getParent();
1138  DebugLoc DL = MBB.findDebugLoc(MI);
1139  switch (MI.getOpcode()) {
1140  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
1141  case AMDGPU::S_MOV_B64_term:
1142  // This is only a terminator to get the correct spill code placement during
1143  // register allocation.
1144  MI.setDesc(get(AMDGPU::S_MOV_B64));
1145  break;
1146 
1147  case AMDGPU::S_XOR_B64_term:
1148  // This is only a terminator to get the correct spill code placement during
1149  // register allocation.
1150  MI.setDesc(get(AMDGPU::S_XOR_B64));
1151  break;
1152 
1153  case AMDGPU::S_ANDN2_B64_term:
1154  // This is only a terminator to get the correct spill code placement during
1155  // register allocation.
1156  MI.setDesc(get(AMDGPU::S_ANDN2_B64));
1157  break;
1158 
1159  case AMDGPU::V_MOV_B64_PSEUDO: {
1160  unsigned Dst = MI.getOperand(0).getReg();
1161  unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1162  unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1163 
1164  const MachineOperand &SrcOp = MI.getOperand(1);
1165  // FIXME: Will this work for 64-bit floating point immediates?
1166  assert(!SrcOp.isFPImm());
1167  if (SrcOp.isImm()) {
1168  APInt Imm(64, SrcOp.getImm());
1169  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
1170  .addImm(Imm.getLoBits(32).getZExtValue())
1171  .addReg(Dst, RegState::Implicit | RegState::Define);
1172  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
1173  .addImm(Imm.getHiBits(32).getZExtValue())
1174  .addReg(Dst, RegState::Implicit | RegState::Define);
1175  } else {
1176  assert(SrcOp.isReg());
1177  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
1178  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
1180  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
1181  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
1183  }
1184  MI.eraseFromParent();
1185  break;
1186  }
1187  case AMDGPU::V_SET_INACTIVE_B32: {
1188  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1189  .addReg(AMDGPU::EXEC);
1190  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
1191  .add(MI.getOperand(2));
1192  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1193  .addReg(AMDGPU::EXEC);
1194  MI.eraseFromParent();
1195  break;
1196  }
1197  case AMDGPU::V_SET_INACTIVE_B64: {
1198  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1199  .addReg(AMDGPU::EXEC);
1200  MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
1201  MI.getOperand(0).getReg())
1202  .add(MI.getOperand(2));
1203  expandPostRAPseudo(*Copy);
1204  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1205  .addReg(AMDGPU::EXEC);
1206  MI.eraseFromParent();
1207  break;
1208  }
1209  case AMDGPU::V_MOVRELD_B32_V1:
1210  case AMDGPU::V_MOVRELD_B32_V2:
1211  case AMDGPU::V_MOVRELD_B32_V4:
1212  case AMDGPU::V_MOVRELD_B32_V8:
1213  case AMDGPU::V_MOVRELD_B32_V16: {
1214  const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
1215  unsigned VecReg = MI.getOperand(0).getReg();
1216  bool IsUndef = MI.getOperand(1).isUndef();
1217  unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
1218  assert(VecReg == MI.getOperand(1).getReg());
1219 
1220  MachineInstr *MovRel =
1221  BuildMI(MBB, MI, DL, MovRelDesc)
1222  .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
1223  .add(MI.getOperand(2))
1224  .addReg(VecReg, RegState::ImplicitDefine)
1225  .addReg(VecReg,
1226  RegState::Implicit | (IsUndef ? RegState::Undef : 0));
1227 
1228  const int ImpDefIdx =
1229  MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
1230  const int ImpUseIdx = ImpDefIdx + 1;
1231  MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
1232 
1233  MI.eraseFromParent();
1234  break;
1235  }
1236  case AMDGPU::SI_PC_ADD_REL_OFFSET: {
1237  MachineFunction &MF = *MBB.getParent();
1238  unsigned Reg = MI.getOperand(0).getReg();
1239  unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
1240  unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
1241 
1242  // Create a bundle so these instructions won't be re-ordered by the
1243  // post-RA scheduler.
1244  MIBundleBuilder Bundler(MBB, MI);
1245  Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
1246 
1247  // Add 32-bit offset from this instruction to the start of the
1248  // constant data.
1249  Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
1250  .addReg(RegLo)
1251  .add(MI.getOperand(1)));
1252 
1253  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
1254  .addReg(RegHi);
1256  MIB.addImm(0);
1257  else
1258  MIB.add(MI.getOperand(2));
1259 
1260  Bundler.append(MIB);
1261  finalizeBundle(MBB, Bundler.begin());
1262 
1263  MI.eraseFromParent();
1264  break;
1265  }
1266  case AMDGPU::EXIT_WWM: {
1267  // This only gets its own opcode so that SIFixWWMLiveness can tell when WWM
1268  // is exited.
1269  MI.setDesc(get(AMDGPU::S_MOV_B64));
1270  break;
1271  }
1272  }
1273  return true;
1274 }
1275 
1277  MachineOperand &Src0,
1278  unsigned Src0OpName,
1279  MachineOperand &Src1,
1280  unsigned Src1OpName) const {
1281  MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
1282  if (!Src0Mods)
1283  return false;
1284 
1285  MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
1286  assert(Src1Mods &&
1287  "All commutable instructions have both src0 and src1 modifiers");
1288 
1289  int Src0ModsVal = Src0Mods->getImm();
1290  int Src1ModsVal = Src1Mods->getImm();
1291 
1292  Src1Mods->setImm(Src0ModsVal);
1293  Src0Mods->setImm(Src1ModsVal);
1294  return true;
1295 }
1296 
1298  MachineOperand &RegOp,
1299  MachineOperand &NonRegOp) {
1300  unsigned Reg = RegOp.getReg();
1301  unsigned SubReg = RegOp.getSubReg();
1302  bool IsKill = RegOp.isKill();
1303  bool IsDead = RegOp.isDead();
1304  bool IsUndef = RegOp.isUndef();
1305  bool IsDebug = RegOp.isDebug();
1306 
1307  if (NonRegOp.isImm())
1308  RegOp.ChangeToImmediate(NonRegOp.getImm());
1309  else if (NonRegOp.isFI())
1310  RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
1311  else
1312  return nullptr;
1313 
1314  NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
1315  NonRegOp.setSubReg(SubReg);
1316 
1317  return &MI;
1318 }
1319 
1321  unsigned Src0Idx,
1322  unsigned Src1Idx) const {
1323  assert(!NewMI && "this should never be used");
1324 
1325  unsigned Opc = MI.getOpcode();
1326  int CommutedOpcode = commuteOpcode(Opc);
1327  if (CommutedOpcode == -1)
1328  return nullptr;
1329 
1330  assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
1331  static_cast<int>(Src0Idx) &&
1332  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
1333  static_cast<int>(Src1Idx) &&
1334  "inconsistency with findCommutedOpIndices");
1335 
1336  MachineOperand &Src0 = MI.getOperand(Src0Idx);
1337  MachineOperand &Src1 = MI.getOperand(Src1Idx);
1338 
1339  MachineInstr *CommutedMI = nullptr;
1340  if (Src0.isReg() && Src1.isReg()) {
1341  if (isOperandLegal(MI, Src1Idx, &Src0)) {
1342  // Be sure to copy the source modifiers to the right place.
1343  CommutedMI
1344  = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
1345  }
1346 
1347  } else if (Src0.isReg() && !Src1.isReg()) {
1348  // src0 should always be able to support any operand type, so no need to
1349  // check operand legality.
1350  CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
1351  } else if (!Src0.isReg() && Src1.isReg()) {
1352  if (isOperandLegal(MI, Src1Idx, &Src0))
1353  CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
1354  } else {
1355  // FIXME: Found two non registers to commute. This does happen.
1356  return nullptr;
1357  }
1358 
1359  if (CommutedMI) {
1360  swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
1361  Src1, AMDGPU::OpName::src1_modifiers);
1362 
1363  CommutedMI->setDesc(get(CommutedOpcode));
1364  }
1365 
1366  return CommutedMI;
1367 }
1368 
1369 // This needs to be implemented because the source modifiers may be inserted
1370 // between the true commutable operands, and the base
1371 // TargetInstrInfo::commuteInstruction uses it.
1373  unsigned &SrcOpIdx1) const {
1374  if (!MI.isCommutable())
1375  return false;
1376 
1377  unsigned Opc = MI.getOpcode();
1378  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1379  if (Src0Idx == -1)
1380  return false;
1381 
1382  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1383  if (Src1Idx == -1)
1384  return false;
1385 
1386  return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
1387 }
1388 
1389 bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1390  int64_t BrOffset) const {
1391  // BranchRelaxation should never have to check s_setpc_b64 because its dest
1392  // block is unanalyzable.
1393  assert(BranchOp != AMDGPU::S_SETPC_B64);
1394 
1395  // Convert to dwords.
1396  BrOffset /= 4;
1397 
1398  // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
1399  // from the next instruction.
1400  BrOffset -= 1;
1401 
1402  return isIntN(BranchOffsetBits, BrOffset);
1403 }
1404 
1406  const MachineInstr &MI) const {
1407  if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
1408  // This would be a difficult analysis to perform, but can always be legal so
1409  // there's no need to analyze it.
1410  return nullptr;
1411  }
1412 
1413  return MI.getOperand(0).getMBB();
1414 }
1415 
1417  MachineBasicBlock &DestBB,
1418  const DebugLoc &DL,
1419  int64_t BrOffset,
1420  RegScavenger *RS) const {
1421  assert(RS && "RegScavenger required for long branching");
1422  assert(MBB.empty() &&
1423  "new block should be inserted for expanding unconditional branch");
1424  assert(MBB.pred_size() == 1);
1425 
1426  MachineFunction *MF = MBB.getParent();
1427  MachineRegisterInfo &MRI = MF->getRegInfo();
1428 
1429  // FIXME: Virtual register workaround for RegScavenger not working with empty
1430  // blocks.
1431  unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1432 
1433  auto I = MBB.end();
1434 
1435  // We need to compute the offset relative to the instruction immediately after
1436  // s_getpc_b64. Insert pc arithmetic code before last terminator.
1437  MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
1438 
1439  // TODO: Handle > 32-bit block address.
1440  if (BrOffset >= 0) {
1441  BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
1442  .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1443  .addReg(PCReg, 0, AMDGPU::sub0)
1445  BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
1446  .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1447  .addReg(PCReg, 0, AMDGPU::sub1)
1448  .addImm(0);
1449  } else {
1450  // Backwards branch.
1451  BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
1452  .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1453  .addReg(PCReg, 0, AMDGPU::sub0)
1455  BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
1456  .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1457  .addReg(PCReg, 0, AMDGPU::sub1)
1458  .addImm(0);
1459  }
1460 
1461  // Insert the indirect branch after the other terminator.
1462  BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
1463  .addReg(PCReg);
1464 
1465  // FIXME: If spilling is necessary, this will fail because this scavenger has
1466  // no emergency stack slots. It is non-trivial to spill in this situation,
1467  // because the restore code needs to be specially placed after the
1468  // jump. BranchRelaxation then needs to be made aware of the newly inserted
1469  // block.
1470  //
1471  // If a spill is needed for the pc register pair, we need to insert a spill
1472  // restore block right before the destination block, and insert a short branch
1473  // into the old destination block's fallthrough predecessor.
1474  // e.g.:
1475  //
1476  // s_cbranch_scc0 skip_long_branch:
1477  //
1478  // long_branch_bb:
1479  // spill s[8:9]
1480  // s_getpc_b64 s[8:9]
1481  // s_add_u32 s8, s8, restore_bb
1482  // s_addc_u32 s9, s9, 0
1483  // s_setpc_b64 s[8:9]
1484  //
1485  // skip_long_branch:
1486  // foo;
1487  //
1488  // .....
1489  //
1490  // dest_bb_fallthrough_predecessor:
1491  // bar;
1492  // s_branch dest_bb
1493  //
1494  // restore_bb:
1495  // restore s[8:9]
1496  // fallthrough dest_bb
1497  ///
1498  // dest_bb:
1499  // buzz;
1500 
1501  RS->enterBasicBlockEnd(MBB);
1502  unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
1503  MachineBasicBlock::iterator(GetPC), 0);
1504  MRI.replaceRegWith(PCReg, Scav);
1505  MRI.clearVirtRegs();
1506  RS->setRegUsed(Scav);
1507 
1508  return 4 + 8 + 4 + 4;
1509 }
1510 
1511 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
1512  switch (Cond) {
1513  case SIInstrInfo::SCC_TRUE:
1514  return AMDGPU::S_CBRANCH_SCC1;
1515  case SIInstrInfo::SCC_FALSE:
1516  return AMDGPU::S_CBRANCH_SCC0;
1517  case SIInstrInfo::VCCNZ:
1518  return AMDGPU::S_CBRANCH_VCCNZ;
1519  case SIInstrInfo::VCCZ:
1520  return AMDGPU::S_CBRANCH_VCCZ;
1521  case SIInstrInfo::EXECNZ:
1522  return AMDGPU::S_CBRANCH_EXECNZ;
1523  case SIInstrInfo::EXECZ:
1524  return AMDGPU::S_CBRANCH_EXECZ;
1525  default:
1526  llvm_unreachable("invalid branch predicate");
1527  }
1528 }
1529 
1530 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
1531  switch (Opcode) {
1532  case AMDGPU::S_CBRANCH_SCC0:
1533  return SCC_FALSE;
1534  case AMDGPU::S_CBRANCH_SCC1:
1535  return SCC_TRUE;
1536  case AMDGPU::S_CBRANCH_VCCNZ:
1537  return VCCNZ;
1538  case AMDGPU::S_CBRANCH_VCCZ:
1539  return VCCZ;
1540  case AMDGPU::S_CBRANCH_EXECNZ:
1541  return EXECNZ;
1542  case AMDGPU::S_CBRANCH_EXECZ:
1543  return EXECZ;
1544  default:
1545  return INVALID_BR;
1546  }
1547 }
1548 
1551  MachineBasicBlock *&TBB,
1552  MachineBasicBlock *&FBB,
1554  bool AllowModify) const {
1555  if (I->getOpcode() == AMDGPU::S_BRANCH) {
1556  // Unconditional Branch
1557  TBB = I->getOperand(0).getMBB();
1558  return false;
1559  }
1560 
1561  MachineBasicBlock *CondBB = nullptr;
1562 
1563  if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
1564  CondBB = I->getOperand(1).getMBB();
1565  Cond.push_back(I->getOperand(0));
1566  } else {
1567  BranchPredicate Pred = getBranchPredicate(I->getOpcode());
1568  if (Pred == INVALID_BR)
1569  return true;
1570 
1571  CondBB = I->getOperand(0).getMBB();
1573  Cond.push_back(I->getOperand(1)); // Save the branch register.
1574  }
1575  ++I;
1576 
1577  if (I == MBB.end()) {
1578  // Conditional branch followed by fall-through.
1579  TBB = CondBB;
1580  return false;
1581  }
1582 
1583  if (I->getOpcode() == AMDGPU::S_BRANCH) {
1584  TBB = CondBB;
1585  FBB = I->getOperand(0).getMBB();
1586  return false;
1587  }
1588 
1589  return true;
1590 }
1591 
1593  MachineBasicBlock *&FBB,
1595  bool AllowModify) const {
1597  if (I == MBB.end())
1598  return false;
1599 
1600  if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
1601  return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
1602 
1603  ++I;
1604 
1605  // TODO: Should be able to treat as fallthrough?
1606  if (I == MBB.end())
1607  return true;
1608 
1609  if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
1610  return true;
1611 
1612  MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
1613 
1614  // Specifically handle the case where the conditional branch is to the same
1615  // destination as the mask branch. e.g.
1616  //
1617  // si_mask_branch BB8
1618  // s_cbranch_execz BB8
1619  // s_cbranch BB9
1620  //
1621  // This is required to understand divergent loops which may need the branches
1622  // to be relaxed.
1623  if (TBB != MaskBrDest || Cond.empty())
1624  return true;
1625 
1626  auto Pred = Cond[0].getImm();
1627  return (Pred != EXECZ && Pred != EXECNZ);
1628 }
1629 
1631  int *BytesRemoved) const {
1633 
1634  unsigned Count = 0;
1635  unsigned RemovedSize = 0;
1636  while (I != MBB.end()) {
1637  MachineBasicBlock::iterator Next = std::next(I);
1638  if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
1639  I = Next;
1640  continue;
1641  }
1642 
1643  RemovedSize += getInstSizeInBytes(*I);
1644  I->eraseFromParent();
1645  ++Count;
1646  I = Next;
1647  }
1648 
1649  if (BytesRemoved)
1650  *BytesRemoved = RemovedSize;
1651 
1652  return Count;
1653 }
1654 
1655 // Copy the flags onto the implicit condition register operand.
1657  const MachineOperand &OrigCond) {
1658  CondReg.setIsUndef(OrigCond.isUndef());
1659  CondReg.setIsKill(OrigCond.isKill());
1660 }
1661 
1663  MachineBasicBlock *TBB,
1664  MachineBasicBlock *FBB,
1666  const DebugLoc &DL,
1667  int *BytesAdded) const {
1668  if (!FBB && Cond.empty()) {
1669  BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1670  .addMBB(TBB);
1671  if (BytesAdded)
1672  *BytesAdded = 4;
1673  return 1;
1674  }
1675 
1676  if(Cond.size() == 1 && Cond[0].isReg()) {
1677  BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
1678  .add(Cond[0])
1679  .addMBB(TBB);
1680  return 1;
1681  }
1682 
1683  assert(TBB && Cond[0].isImm());
1684 
1685  unsigned Opcode
1686  = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
1687 
1688  if (!FBB) {
1689  Cond[1].isUndef();
1690  MachineInstr *CondBr =
1691  BuildMI(&MBB, DL, get(Opcode))
1692  .addMBB(TBB);
1693 
1694  // Copy the flags onto the implicit condition register operand.
1695  preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
1696 
1697  if (BytesAdded)
1698  *BytesAdded = 4;
1699  return 1;
1700  }
1701 
1702  assert(TBB && FBB);
1703 
1704  MachineInstr *CondBr =
1705  BuildMI(&MBB, DL, get(Opcode))
1706  .addMBB(TBB);
1707  BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1708  .addMBB(FBB);
1709 
1710  MachineOperand &CondReg = CondBr->getOperand(1);
1711  CondReg.setIsUndef(Cond[1].isUndef());
1712  CondReg.setIsKill(Cond[1].isKill());
1713 
1714  if (BytesAdded)
1715  *BytesAdded = 8;
1716 
1717  return 2;
1718 }
1719 
1721  SmallVectorImpl<MachineOperand> &Cond) const {
1722  if (Cond.size() != 2) {
1723  return true;
1724  }
1725 
1726  if (Cond[0].isImm()) {
1727  Cond[0].setImm(-Cond[0].getImm());
1728  return false;
1729  }
1730 
1731  return true;
1732 }
1733 
1736  unsigned TrueReg, unsigned FalseReg,
1737  int &CondCycles,
1738  int &TrueCycles, int &FalseCycles) const {
1739  switch (Cond[0].getImm()) {
1740  case VCCNZ:
1741  case VCCZ: {
1742  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1743  const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1744  assert(MRI.getRegClass(FalseReg) == RC);
1745 
1746  int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1747  CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1748 
1749  // Limit to equal cost for branch vs. N v_cndmask_b32s.
1750  return !RI.isSGPRClass(RC) && NumInsts <= 6;
1751  }
1752  case SCC_TRUE:
1753  case SCC_FALSE: {
1754  // FIXME: We could insert for VGPRs if we could replace the original compare
1755  // with a vector one.
1756  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1757  const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1758  assert(MRI.getRegClass(FalseReg) == RC);
1759 
1760  int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1761 
1762  // Multiples of 8 can do s_cselect_b64
1763  if (NumInsts % 2 == 0)
1764  NumInsts /= 2;
1765 
1766  CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1767  return RI.isSGPRClass(RC);
1768  }
1769  default:
1770  return false;
1771  }
1772 }
1773 
1776  unsigned DstReg, ArrayRef<MachineOperand> Cond,
1777  unsigned TrueReg, unsigned FalseReg) const {
1778  BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
1779  if (Pred == VCCZ || Pred == SCC_FALSE) {
1780  Pred = static_cast<BranchPredicate>(-Pred);
1781  std::swap(TrueReg, FalseReg);
1782  }
1783 
1785  const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
1786  unsigned DstSize = RI.getRegSizeInBits(*DstRC);
1787 
1788  if (DstSize == 32) {
1789  unsigned SelOp = Pred == SCC_TRUE ?
1790  AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
1791 
1792  // Instruction's operands are backwards from what is expected.
1793  MachineInstr *Select =
1794  BuildMI(MBB, I, DL, get(SelOp), DstReg)
1795  .addReg(FalseReg)
1796  .addReg(TrueReg);
1797 
1798  preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1799  return;
1800  }
1801 
1802  if (DstSize == 64 && Pred == SCC_TRUE) {
1803  MachineInstr *Select =
1804  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
1805  .addReg(FalseReg)
1806  .addReg(TrueReg);
1807 
1808  preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1809  return;
1810  }
1811 
1812  static const int16_t Sub0_15[] = {
1813  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1814  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1815  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1816  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1817  };
1818 
1819  static const int16_t Sub0_15_64[] = {
1820  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1821  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1822  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1823  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1824  };
1825 
1826  unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
1827  const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
1828  const int16_t *SubIndices = Sub0_15;
1829  int NElts = DstSize / 32;
1830 
1831  // 64-bit select is only avaialble for SALU.
1832  if (Pred == SCC_TRUE) {
1833  SelOp = AMDGPU::S_CSELECT_B64;
1834  EltRC = &AMDGPU::SGPR_64RegClass;
1835  SubIndices = Sub0_15_64;
1836 
1837  assert(NElts % 2 == 0);
1838  NElts /= 2;
1839  }
1840 
1842  MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
1843 
1844  I = MIB->getIterator();
1845 
1847  for (int Idx = 0; Idx != NElts; ++Idx) {
1848  unsigned DstElt = MRI.createVirtualRegister(EltRC);
1849  Regs.push_back(DstElt);
1850 
1851  unsigned SubIdx = SubIndices[Idx];
1852 
1853  MachineInstr *Select =
1854  BuildMI(MBB, I, DL, get(SelOp), DstElt)
1855  .addReg(FalseReg, 0, SubIdx)
1856  .addReg(TrueReg, 0, SubIdx);
1857  preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1858 
1859  MIB.addReg(DstElt)
1860  .addImm(SubIdx);
1861  }
1862 }
1863 
1865  switch (MI.getOpcode()) {
1866  case AMDGPU::V_MOV_B32_e32:
1867  case AMDGPU::V_MOV_B32_e64:
1868  case AMDGPU::V_MOV_B64_PSEUDO: {
1869  // If there are additional implicit register operands, this may be used for
1870  // register indexing so the source register operand isn't simply copied.
1871  unsigned NumOps = MI.getDesc().getNumOperands() +
1872  MI.getDesc().getNumImplicitUses();
1873 
1874  return MI.getNumOperands() == NumOps;
1875  }
1876  case AMDGPU::S_MOV_B32:
1877  case AMDGPU::S_MOV_B64:
1878  case AMDGPU::COPY:
1879  return true;
1880  default:
1881  return false;
1882  }
1883 }
1884 
1887  switch(Kind) {
1890  return AMDGPUASI.PRIVATE_ADDRESS;
1897  return AMDGPUASI.CONSTANT_ADDRESS;
1898  }
1899  return AMDGPUASI.FLAT_ADDRESS;
1900 }
1901 
1903  unsigned Opc = MI.getOpcode();
1904  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1905  AMDGPU::OpName::src0_modifiers);
1906  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1907  AMDGPU::OpName::src1_modifiers);
1908  int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1909  AMDGPU::OpName::src2_modifiers);
1910 
1911  MI.RemoveOperand(Src2ModIdx);
1912  MI.RemoveOperand(Src1ModIdx);
1913  MI.RemoveOperand(Src0ModIdx);
1914 }
1915 
1917  unsigned Reg, MachineRegisterInfo *MRI) const {
1918  if (!MRI->hasOneNonDBGUse(Reg))
1919  return false;
1920 
1921  unsigned Opc = UseMI.getOpcode();
1922  if (Opc == AMDGPU::COPY) {
1923  bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
1924  switch (DefMI.getOpcode()) {
1925  default:
1926  return false;
1927  case AMDGPU::S_MOV_B64:
1928  // TODO: We could fold 64-bit immediates, but this get compilicated
1929  // when there are sub-registers.
1930  return false;
1931 
1932  case AMDGPU::V_MOV_B32_e32:
1933  case AMDGPU::S_MOV_B32:
1934  break;
1935  }
1936  unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
1937  const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
1938  assert(ImmOp);
1939  // FIXME: We could handle FrameIndex values here.
1940  if (!ImmOp->isImm()) {
1941  return false;
1942  }
1943  UseMI.setDesc(get(NewOpc));
1944  UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
1945  UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
1946  return true;
1947  }
1948 
1949  if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
1950  Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
1951  // Don't fold if we are using source or output modifiers. The new VOP2
1952  // instructions don't have them.
1953  if (hasAnyModifiersSet(UseMI))
1954  return false;
1955 
1956  const MachineOperand &ImmOp = DefMI.getOperand(1);
1957 
1958  // If this is a free constant, there's no reason to do this.
1959  // TODO: We could fold this here instead of letting SIFoldOperands do it
1960  // later.
1961  MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
1962 
1963  // Any src operand can be used for the legality check.
1964  if (isInlineConstant(UseMI, *Src0, ImmOp))
1965  return false;
1966 
1967  bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
1968  MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
1969  MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
1970 
1971  // Multiplied part is the constant: Use v_madmk_{f16, f32}.
1972  // We should only expect these to be on src0 due to canonicalizations.
1973  if (Src0->isReg() && Src0->getReg() == Reg) {
1974  if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
1975  return false;
1976 
1977  if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
1978  return false;
1979 
1980  // We need to swap operands 0 and 1 since madmk constant is at operand 1.
1981 
1982  const int64_t Imm = DefMI.getOperand(1).getImm();
1983 
1984  // FIXME: This would be a lot easier if we could return a new instruction
1985  // instead of having to modify in place.
1986 
1987  // Remove these first since they are at the end.
1988  UseMI.RemoveOperand(
1989  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
1990  UseMI.RemoveOperand(
1991  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
1992 
1993  unsigned Src1Reg = Src1->getReg();
1994  unsigned Src1SubReg = Src1->getSubReg();
1995  Src0->setReg(Src1Reg);
1996  Src0->setSubReg(Src1SubReg);
1997  Src0->setIsKill(Src1->isKill());
1998 
1999  if (Opc == AMDGPU::V_MAC_F32_e64 ||
2000  Opc == AMDGPU::V_MAC_F16_e64)
2001  UseMI.untieRegOperand(
2002  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
2003 
2004  Src1->ChangeToImmediate(Imm);
2005 
2006  removeModOperands(UseMI);
2007  UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
2008 
2009  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
2010  if (DeleteDef)
2011  DefMI.eraseFromParent();
2012 
2013  return true;
2014  }
2015 
2016  // Added part is the constant: Use v_madak_{f16, f32}.
2017  if (Src2->isReg() && Src2->getReg() == Reg) {
2018  // Not allowed to use constant bus for another operand.
2019  // We can however allow an inline immediate as src0.
2020  if (!Src0->isImm() &&
2021  (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
2022  return false;
2023 
2024  if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
2025  return false;
2026 
2027  const int64_t Imm = DefMI.getOperand(1).getImm();
2028 
2029  // FIXME: This would be a lot easier if we could return a new instruction
2030  // instead of having to modify in place.
2031 
2032  // Remove these first since they are at the end.
2033  UseMI.RemoveOperand(
2034  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
2035  UseMI.RemoveOperand(
2036  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
2037 
2038  if (Opc == AMDGPU::V_MAC_F32_e64 ||
2039  Opc == AMDGPU::V_MAC_F16_e64)
2040  UseMI.untieRegOperand(
2041  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
2042 
2043  // ChangingToImmediate adds Src2 back to the instruction.
2044  Src2->ChangeToImmediate(Imm);
2045 
2046  // These come before src2.
2047  removeModOperands(UseMI);
2048  UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
2049 
2050  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
2051  if (DeleteDef)
2052  DefMI.eraseFromParent();
2053 
2054  return true;
2055  }
2056  }
2057 
2058  return false;
2059 }
2060 
2061 static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
2062  int WidthB, int OffsetB) {
2063  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
2064  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
2065  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2066  return LowOffset + LowWidth <= HighOffset;
2067 }
2068 
2069 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
2070  MachineInstr &MIb) const {
2071  unsigned BaseReg0, BaseReg1;
2072  int64_t Offset0, Offset1;
2073 
2074  if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
2075  getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
2076 
2077  if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
2078  // FIXME: Handle ds_read2 / ds_write2.
2079  return false;
2080  }
2081  unsigned Width0 = (*MIa.memoperands_begin())->getSize();
2082  unsigned Width1 = (*MIb.memoperands_begin())->getSize();
2083  if (BaseReg0 == BaseReg1 &&
2084  offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
2085  return true;
2086  }
2087  }
2088 
2089  return false;
2090 }
2091 
2093  MachineInstr &MIb,
2094  AliasAnalysis *AA) const {
2095  assert((MIa.mayLoad() || MIa.mayStore()) &&
2096  "MIa must load from or modify a memory location");
2097  assert((MIb.mayLoad() || MIb.mayStore()) &&
2098  "MIb must load from or modify a memory location");
2099 
2101  return false;
2102 
2103  // XXX - Can we relax this between address spaces?
2104  if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
2105  return false;
2106 
2107  if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
2108  const MachineMemOperand *MMOa = *MIa.memoperands_begin();
2109  const MachineMemOperand *MMOb = *MIb.memoperands_begin();
2110  if (MMOa->getValue() && MMOb->getValue()) {
2111  MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
2112  MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
2113  if (!AA->alias(LocA, LocB))
2114  return true;
2115  }
2116  }
2117 
2118  // TODO: Should we check the address space from the MachineMemOperand? That
2119  // would allow us to distinguish objects we know don't alias based on the
2120  // underlying address space, even if it was lowered to a different one,
2121  // e.g. private accesses lowered to use MUBUF instructions on a scratch
2122  // buffer.
2123  if (isDS(MIa)) {
2124  if (isDS(MIb))
2125  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2126 
2127  return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb);
2128  }
2129 
2130  if (isMUBUF(MIa) || isMTBUF(MIa)) {
2131  if (isMUBUF(MIb) || isMTBUF(MIb))
2132  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2133 
2134  return !isFLAT(MIb) && !isSMRD(MIb);
2135  }
2136 
2137  if (isSMRD(MIa)) {
2138  if (isSMRD(MIb))
2139  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2140 
2141  return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
2142  }
2143 
2144  if (isFLAT(MIa)) {
2145  if (isFLAT(MIb))
2146  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2147 
2148  return false;
2149  }
2150 
2151  return false;
2152 }
2153 
2154 static int64_t getFoldableImm(const MachineOperand* MO) {
2155  if (!MO->isReg())
2156  return false;
2157  const MachineFunction *MF = MO->getParent()->getParent()->getParent();
2158  const MachineRegisterInfo &MRI = MF->getRegInfo();
2159  auto Def = MRI.getUniqueVRegDef(MO->getReg());
2160  if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
2161  Def->getOperand(1).isImm())
2162  return Def->getOperand(1).getImm();
2163  return AMDGPU::NoRegister;
2164 }
2165 
2167  MachineInstr &MI,
2168  LiveVariables *LV) const {
2169  bool IsF16 = false;
2170 
2171  switch (MI.getOpcode()) {
2172  default:
2173  return nullptr;
2174  case AMDGPU::V_MAC_F16_e64:
2175  IsF16 = true;
2177  case AMDGPU::V_MAC_F32_e64:
2178  break;
2179  case AMDGPU::V_MAC_F16_e32:
2180  IsF16 = true;
2182  case AMDGPU::V_MAC_F32_e32: {
2183  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
2184  AMDGPU::OpName::src0);
2185  const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
2186  if (!Src0->isReg() && !Src0->isImm())
2187  return nullptr;
2188 
2189  if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
2190  return nullptr;
2191 
2192  break;
2193  }
2194  }
2195 
2196  const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2197  const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
2198  const MachineOperand *Src0Mods =
2199  getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
2200  const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
2201  const MachineOperand *Src1Mods =
2202  getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
2203  const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
2204  const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
2205  const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
2206 
2207  if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
2208  // If we have an SGPR input, we will violate the constant bus restriction.
2209  (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
2210  if (auto Imm = getFoldableImm(Src2)) {
2211  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2212  get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
2213  .add(*Dst)
2214  .add(*Src0)
2215  .add(*Src1)
2216  .addImm(Imm);
2217  }
2218  if (auto Imm = getFoldableImm(Src1)) {
2219  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2220  get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
2221  .add(*Dst)
2222  .add(*Src0)
2223  .addImm(Imm)
2224  .add(*Src2);
2225  }
2226  if (auto Imm = getFoldableImm(Src0)) {
2227  if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
2228  AMDGPU::OpName::src0), Src1))
2229  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2230  get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
2231  .add(*Dst)
2232  .add(*Src1)
2233  .addImm(Imm)
2234  .add(*Src2);
2235  }
2236  }
2237 
2238  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2239  get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
2240  .add(*Dst)
2241  .addImm(Src0Mods ? Src0Mods->getImm() : 0)
2242  .add(*Src0)
2243  .addImm(Src1Mods ? Src1Mods->getImm() : 0)
2244  .add(*Src1)
2245  .addImm(0) // Src mods
2246  .add(*Src2)
2247  .addImm(Clamp ? Clamp->getImm() : 0)
2248  .addImm(Omod ? Omod->getImm() : 0);
2249 }
2250 
2251 // It's not generally safe to move VALU instructions across these since it will
2252 // start using the register as a base index rather than directly.
2253 // XXX - Why isn't hasSideEffects sufficient for these?
2255  switch (MI.getOpcode()) {
2256  case AMDGPU::S_SET_GPR_IDX_ON:
2257  case AMDGPU::S_SET_GPR_IDX_MODE:
2258  case AMDGPU::S_SET_GPR_IDX_OFF:
2259  return true;
2260  default:
2261  return false;
2262  }
2263 }
2264 
2266  const MachineBasicBlock *MBB,
2267  const MachineFunction &MF) const {
2268  // XXX - Do we want the SP check in the base implementation?
2269 
2270  // Target-independent instructions do not have an implicit-use of EXEC, even
2271  // when they operate on VGPRs. Treating EXEC modifications as scheduling
2272  // boundaries prevents incorrect movements of such instructions.
2273  return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) ||
2274  MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
2275  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
2276  MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
2278 }
2279 
2280 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
2281  switch (Imm.getBitWidth()) {
2282  case 32:
2284  ST.hasInv2PiInlineImm());
2285  case 64:
2287  ST.hasInv2PiInlineImm());
2288  case 16:
2289  return ST.has16BitInsts() &&
2291  ST.hasInv2PiInlineImm());
2292  default:
2293  llvm_unreachable("invalid bitwidth");
2294  }
2295 }
2296 
2298  uint8_t OperandType) const {
2299  if (!MO.isImm() ||
2300  OperandType < AMDGPU::OPERAND_SRC_FIRST ||
2301  OperandType > AMDGPU::OPERAND_SRC_LAST)
2302  return false;
2303 
2304  // MachineOperand provides no way to tell the true operand size, since it only
2305  // records a 64-bit value. We need to know the size to determine if a 32-bit
2306  // floating point immediate bit pattern is legal for an integer immediate. It
2307  // would be for any 32-bit integer operand, but would not be for a 64-bit one.
2308 
2309  int64_t Imm = MO.getImm();
2310  switch (OperandType) {
2315  int32_t Trunc = static_cast<int32_t>(Imm);
2316  return Trunc == Imm &&
2318  }
2324  ST.hasInv2PiInlineImm());
2329  if (isInt<16>(Imm) || isUInt<16>(Imm)) {
2330  // A few special case instructions have 16-bit operands on subtargets
2331  // where 16-bit instructions are not legal.
2332  // TODO: Do the 32-bit immediates work? We shouldn't really need to handle
2333  // constants in these cases
2334  int16_t Trunc = static_cast<int16_t>(Imm);
2335  return ST.has16BitInsts() &&
2337  }
2338 
2339  return false;
2340  }
2343  uint32_t Trunc = static_cast<uint32_t>(Imm);
2345  }
2346  default:
2347  llvm_unreachable("invalid bitwidth");
2348  }
2349 }
2350 
2352  const MCOperandInfo &OpInfo) const {
2353  switch (MO.getType()) {
2355  return false;
2357  return !isInlineConstant(MO, OpInfo);
2363  return true;
2364  default:
2365  llvm_unreachable("unexpected operand type");
2366  }
2367 }
2368 
2369 static bool compareMachineOp(const MachineOperand &Op0,
2370  const MachineOperand &Op1) {
2371  if (Op0.getType() != Op1.getType())
2372  return false;
2373 
2374  switch (Op0.getType()) {
2376  return Op0.getReg() == Op1.getReg();
2378  return Op0.getImm() == Op1.getImm();
2379  default:
2380  llvm_unreachable("Didn't expect to be comparing these operand types");
2381  }
2382 }
2383 
2385  const MachineOperand &MO) const {
2386  const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
2387 
2388  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
2389 
2390  if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
2391  return true;
2392 
2393  if (OpInfo.RegClass < 0)
2394  return false;
2395 
2396  if (MO.isImm() && isInlineConstant(MO, OpInfo))
2397  return RI.opCanUseInlineConstant(OpInfo.OperandType);
2398 
2399  return RI.opCanUseLiteralConstant(OpInfo.OperandType);
2400 }
2401 
2402 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
2403  int Op32 = AMDGPU::getVOPe32(Opcode);
2404  if (Op32 == -1)
2405  return false;
2406 
2407  return pseudoToMCOpcode(Op32) != -1;
2408 }
2409 
2410 bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
2411  // The src0_modifier operand is present on all instructions
2412  // that have modifiers.
2413 
2414  return AMDGPU::getNamedOperandIdx(Opcode,
2415  AMDGPU::OpName::src0_modifiers) != -1;
2416 }
2417 
2419  unsigned OpName) const {
2420  const MachineOperand *Mods = getNamedOperand(MI, OpName);
2421  return Mods && Mods->getImm();
2422 }
2423 
2425  return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
2426  hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
2427  hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
2428  hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
2429  hasModifiersSet(MI, AMDGPU::OpName::omod);
2430 }
2431 
2433  const MachineOperand &MO,
2434  const MCOperandInfo &OpInfo) const {
2435  // Literal constants use the constant bus.
2436  //if (isLiteralConstantLike(MO, OpInfo))
2437  // return true;
2438  if (MO.isImm())
2439  return !isInlineConstant(MO, OpInfo);
2440 
2441  if (!MO.isReg())
2442  return true; // Misc other operands like FrameIndex
2443 
2444  if (!MO.isUse())
2445  return false;
2446 
2448  return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
2449 
2450  // FLAT_SCR is just an SGPR pair.
2451  if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
2452  return true;
2453 
2454  // EXEC register uses the constant bus.
2455  if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
2456  return true;
2457 
2458  // SGPRs use the constant bus
2459  return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
2460  (!MO.isImplicit() &&
2461  (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
2462  AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
2463 }
2464 
2465 static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
2466  for (const MachineOperand &MO : MI.implicit_operands()) {
2467  // We only care about reads.
2468  if (MO.isDef())
2469  continue;
2470 
2471  switch (MO.getReg()) {
2472  case AMDGPU::VCC:
2473  case AMDGPU::M0:
2474  case AMDGPU::FLAT_SCR:
2475  return MO.getReg();
2476 
2477  default:
2478  break;
2479  }
2480  }
2481 
2482  return AMDGPU::NoRegister;
2483 }
2484 
2485 static bool shouldReadExec(const MachineInstr &MI) {
2486  if (SIInstrInfo::isVALU(MI)) {
2487  switch (MI.getOpcode()) {
2488  case AMDGPU::V_READLANE_B32:
2489  case AMDGPU::V_READLANE_B32_si:
2490  case AMDGPU::V_READLANE_B32_vi:
2491  case AMDGPU::V_WRITELANE_B32:
2492  case AMDGPU::V_WRITELANE_B32_si:
2493  case AMDGPU::V_WRITELANE_B32_vi:
2494  return false;
2495  }
2496 
2497  return true;
2498  }
2499 
2500  if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
2501  SIInstrInfo::isSALU(MI) ||
2502  SIInstrInfo::isSMRD(MI))
2503  return false;
2504 
2505  return true;
2506 }
2507 
2508 static bool isSubRegOf(const SIRegisterInfo &TRI,
2509  const MachineOperand &SuperVec,
2510  const MachineOperand &SubReg) {
2512  return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
2513 
2514  return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
2515  SubReg.getReg() == SuperVec.getReg();
2516 }
2517 
2519  StringRef &ErrInfo) const {
2520  uint16_t Opcode = MI.getOpcode();
2521  if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
2522  return true;
2523 
2524  const MachineFunction *MF = MI.getParent()->getParent();
2525  const MachineRegisterInfo &MRI = MF->getRegInfo();
2526 
2527  int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2528  int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2529  int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2530 
2531  // Make sure the number of operands is correct.
2532  const MCInstrDesc &Desc = get(Opcode);
2533  if (!Desc.isVariadic() &&
2534  Desc.getNumOperands() != MI.getNumExplicitOperands()) {
2535  ErrInfo = "Instruction has wrong number of operands.";
2536  return false;
2537  }
2538 
2539  if (MI.isInlineAsm()) {
2540  // Verify register classes for inlineasm constraints.
2541  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
2542  I != E; ++I) {
2543  const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
2544  if (!RC)
2545  continue;
2546 
2547  const MachineOperand &Op = MI.getOperand(I);
2548  if (!Op.isReg())
2549  continue;
2550 
2551  unsigned Reg = Op.getReg();
2552  if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
2553  ErrInfo = "inlineasm operand has incorrect register class.";
2554  return false;
2555  }
2556  }
2557 
2558  return true;
2559  }
2560 
2561  // Make sure the register classes are correct.
2562  for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
2563  if (MI.getOperand(i).isFPImm()) {
2564  ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
2565  "all fp values to integers.";
2566  return false;
2567  }
2568 
2569  int RegClass = Desc.OpInfo[i].RegClass;
2570 
2571  switch (Desc.OpInfo[i].OperandType) {
2573  if (MI.getOperand(i).isImm()) {
2574  ErrInfo = "Illegal immediate value for operand.";
2575  return false;
2576  }
2577  break;
2580  break;
2587  const MachineOperand &MO = MI.getOperand(i);
2588  if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
2589  ErrInfo = "Illegal immediate value for operand.";
2590  return false;
2591  }
2592  break;
2593  }
2596  // Check if this operand is an immediate.
2597  // FrameIndex operands will be replaced by immediates, so they are
2598  // allowed.
2599  if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
2600  ErrInfo = "Expected immediate, but got non-immediate";
2601  return false;
2602  }
2604  default:
2605  continue;
2606  }
2607 
2608  if (!MI.getOperand(i).isReg())
2609  continue;
2610 
2611  if (RegClass != -1) {
2612  unsigned Reg = MI.getOperand(i).getReg();
2613  if (Reg == AMDGPU::NoRegister ||
2615  continue;
2616 
2617  const TargetRegisterClass *RC = RI.getRegClass(RegClass);
2618  if (!RC->contains(Reg)) {
2619  ErrInfo = "Operand has incorrect register class.";
2620  return false;
2621  }
2622  }
2623  }
2624 
2625  // Verify SDWA
2626  if (isSDWA(MI)) {
2627  if (!ST.hasSDWA()) {
2628  ErrInfo = "SDWA is not supported on this target";
2629  return false;
2630  }
2631 
2632  int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2633 
2634  const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
2635 
2636  for (int OpIdx: OpIndicies) {
2637  if (OpIdx == -1)
2638  continue;
2639  const MachineOperand &MO = MI.getOperand(OpIdx);
2640 
2641  if (!ST.hasSDWAScalar()) {
2642  // Only VGPRS on VI
2643  if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
2644  ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
2645  return false;
2646  }
2647  } else {
2648  // No immediates on GFX9
2649  if (!MO.isReg()) {
2650  ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
2651  return false;
2652  }
2653  }
2654  }
2655 
2656  if (!ST.hasSDWAOmod()) {
2657  // No omod allowed on VI
2658  const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2659  if (OMod != nullptr &&
2660  (!OMod->isImm() || OMod->getImm() != 0)) {
2661  ErrInfo = "OMod not allowed in SDWA instructions on VI";
2662  return false;
2663  }
2664  }
2665 
2666  uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
2667  if (isVOPC(BasicOpcode)) {
2668  if (!ST.hasSDWASdst() && DstIdx != -1) {
2669  // Only vcc allowed as dst on VI for VOPC
2670  const MachineOperand &Dst = MI.getOperand(DstIdx);
2671  if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
2672  ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
2673  return false;
2674  }
2675  } else if (!ST.hasSDWAOutModsVOPC()) {
2676  // No clamp allowed on GFX9 for VOPC
2677  const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
2678  if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
2679  ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
2680  return false;
2681  }
2682 
2683  // No omod allowed on GFX9 for VOPC
2684  const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2685  if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
2686  ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
2687  return false;
2688  }
2689  }
2690  }
2691  }
2692 
2693  // Verify VOP*
2694  if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) {
2695  // Only look at the true operands. Only a real operand can use the constant
2696  // bus, and we don't want to check pseudo-operands like the source modifier
2697  // flags.
2698  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2699 
2700  unsigned ConstantBusCount = 0;
2701 
2702  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
2703  ++ConstantBusCount;
2704 
2705  unsigned SGPRUsed = findImplicitSGPRRead(MI);
2706  if (SGPRUsed != AMDGPU::NoRegister)
2707  ++ConstantBusCount;
2708 
2709  for (int OpIdx : OpIndices) {
2710  if (OpIdx == -1)
2711  break;
2712  const MachineOperand &MO = MI.getOperand(OpIdx);
2713  if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
2714  if (MO.isReg()) {
2715  if (MO.getReg() != SGPRUsed)
2716  ++ConstantBusCount;
2717  SGPRUsed = MO.getReg();
2718  } else {
2719  ++ConstantBusCount;
2720  }
2721  }
2722  }
2723  if (ConstantBusCount > 1) {
2724  ErrInfo = "VOP* instruction uses the constant bus more than once";
2725  return false;
2726  }
2727  }
2728 
2729  // Verify misc. restrictions on specific instructions.
2730  if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
2731  Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
2732  const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2733  const MachineOperand &Src1 = MI.getOperand(Src1Idx);
2734  const MachineOperand &Src2 = MI.getOperand(Src2Idx);
2735  if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
2736  if (!compareMachineOp(Src0, Src1) &&
2737  !compareMachineOp(Src0, Src2)) {
2738  ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
2739  return false;
2740  }
2741  }
2742  }
2743 
2744  if (isSOPK(MI)) {
2745  int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
2746  if (sopkIsZext(MI)) {
2747  if (!isUInt<16>(Imm)) {
2748  ErrInfo = "invalid immediate for SOPK instruction";
2749  return false;
2750  }
2751  } else {
2752  if (!isInt<16>(Imm)) {
2753  ErrInfo = "invalid immediate for SOPK instruction";
2754  return false;
2755  }
2756  }
2757  }
2758 
2759  if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
2760  Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
2761  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2762  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
2763  const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2764  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
2765 
2766  const unsigned StaticNumOps = Desc.getNumOperands() +
2767  Desc.getNumImplicitUses();
2768  const unsigned NumImplicitOps = IsDst ? 2 : 1;
2769 
2770  // Allow additional implicit operands. This allows a fixup done by the post
2771  // RA scheduler where the main implicit operand is killed and implicit-defs
2772  // are added for sub-registers that remain live after this instruction.
2773  if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
2774  ErrInfo = "missing implicit register operands";
2775  return false;
2776  }
2777 
2778  const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2779  if (IsDst) {
2780  if (!Dst->isUse()) {
2781  ErrInfo = "v_movreld_b32 vdst should be a use operand";
2782  return false;
2783  }
2784 
2785  unsigned UseOpIdx;
2786  if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
2787  UseOpIdx != StaticNumOps + 1) {
2788  ErrInfo = "movrel implicit operands should be tied";
2789  return false;
2790  }
2791  }
2792 
2793  const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2794  const MachineOperand &ImpUse
2795  = MI.getOperand(StaticNumOps + NumImplicitOps - 1);
2796  if (!ImpUse.isReg() || !ImpUse.isUse() ||
2797  !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
2798  ErrInfo = "src0 should be subreg of implicit vector use";
2799  return false;
2800  }
2801  }
2802 
2803  // Make sure we aren't losing exec uses in the td files. This mostly requires
2804  // being careful when using let Uses to try to add other use registers.
2805  if (shouldReadExec(MI)) {
2806  if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
2807  ErrInfo = "VALU instruction does not implicitly read exec mask";
2808  return false;
2809  }
2810  }
2811 
2812  if (isSMRD(MI)) {
2813  if (MI.mayStore()) {
2814  // The register offset form of scalar stores may only use m0 as the
2815  // soffset register.
2816  const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
2817  if (Soff && Soff->getReg() != AMDGPU::M0) {
2818  ErrInfo = "scalar stores must use m0 as offset register";
2819  return false;
2820  }
2821  }
2822  }
2823 
2824  if (isFLAT(MI) && !MF->getSubtarget<SISubtarget>().hasFlatInstOffsets()) {
2825  const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
2826  if (Offset->getImm() != 0) {
2827  ErrInfo = "subtarget does not support offsets in flat instructions";
2828  return false;
2829  }
2830  }
2831 
2832  return true;
2833 }
2834 
2836  switch (MI.getOpcode()) {
2837  default: return AMDGPU::INSTRUCTION_LIST_END;
2838  case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
2839  case AMDGPU::COPY: return AMDGPU::COPY;
2840  case AMDGPU::PHI: return AMDGPU::PHI;
2841  case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
2842  case AMDGPU::WQM: return AMDGPU::WQM;
2843  case AMDGPU::WWM: return AMDGPU::WWM;
2844  case AMDGPU::S_MOV_B32:
2845  return MI.getOperand(1).isReg() ?
2846  AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
2847  case AMDGPU::S_ADD_I32:
2848  case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
2849  case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
2850  case AMDGPU::S_SUB_I32:
2851  case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
2852  case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
2853  case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
2854  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
2855  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
2856  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
2857  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
2858  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
2859  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
2860  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
2861  case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
2862  case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
2863  case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
2864  case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
2865  case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
2866  case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
2867  case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
2868  case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
2869  case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
2870  case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
2871  case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
2872  case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
2873  case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
2874  case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
2875  case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
2876  case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
2877  case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
2878  case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
2879  case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
2880  case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
2881  case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
2882  case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
2883  case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
2884  case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
2885  case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
2886  case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
2887  case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
2888  case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
2889  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
2890  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
2891  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
2892  case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
2893  case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
2894  case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
2895  }
2896 }
2897 
2899  return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
2900 }
2901 
2903  unsigned OpNo) const {
2905  const MCInstrDesc &Desc = get(MI.getOpcode());
2906  if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
2907  Desc.OpInfo[OpNo].RegClass == -1) {
2908  unsigned Reg = MI.getOperand(OpNo).getReg();
2909 
2911  return MRI.getRegClass(Reg);
2912  return RI.getPhysRegClass(Reg);
2913  }
2914 
2915  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
2916  return RI.getRegClass(RCID);
2917 }
2918 
2919 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
2920  switch (MI.getOpcode()) {
2921  case AMDGPU::COPY:
2922  case AMDGPU::REG_SEQUENCE:
2923  case AMDGPU::PHI:
2924  case AMDGPU::INSERT_SUBREG:
2925  return RI.hasVGPRs(getOpRegClass(MI, 0));
2926  default:
2927  return RI.hasVGPRs(getOpRegClass(MI, OpNo));
2928  }
2929 }
2930 
2931 void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
2933  MachineBasicBlock *MBB = MI.getParent();
2934  MachineOperand &MO = MI.getOperand(OpIdx);
2936  unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
2937  const TargetRegisterClass *RC = RI.getRegClass(RCID);
2938  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
2939  if (MO.isReg())
2940  Opcode = AMDGPU::COPY;
2941  else if (RI.isSGPRClass(RC))
2942  Opcode = AMDGPU::S_MOV_B32;
2943 
2944  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
2945  if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
2946  VRC = &AMDGPU::VReg_64RegClass;
2947  else
2948  VRC = &AMDGPU::VGPR_32RegClass;
2949 
2950  unsigned Reg = MRI.createVirtualRegister(VRC);
2951  DebugLoc DL = MBB->findDebugLoc(I);
2952  BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
2953  MO.ChangeToRegister(Reg, false);
2954 }
2955 
2958  MachineOperand &SuperReg,
2959  const TargetRegisterClass *SuperRC,
2960  unsigned SubIdx,
2961  const TargetRegisterClass *SubRC)
2962  const {
2963  MachineBasicBlock *MBB = MI->getParent();
2964  DebugLoc DL = MI->getDebugLoc();
2965  unsigned SubReg = MRI.createVirtualRegister(SubRC);
2966 
2967  if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
2968  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
2969  .addReg(SuperReg.getReg(), 0, SubIdx);
2970  return SubReg;
2971  }
2972 
2973  // Just in case the super register is itself a sub-register, copy it to a new
2974  // value so we don't need to worry about merging its subreg index with the
2975  // SubIdx passed to this function. The register coalescer should be able to
2976  // eliminate this extra copy.
2977  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
2978 
2979  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
2980  .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
2981 
2982  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
2983  .addReg(NewSuperReg, 0, SubIdx);
2984 
2985  return SubReg;
2986 }
2987 
2991  MachineOperand &Op,
2992  const TargetRegisterClass *SuperRC,
2993  unsigned SubIdx,
2994  const TargetRegisterClass *SubRC) const {
2995  if (Op.isImm()) {
2996  if (SubIdx == AMDGPU::sub0)
2997  return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
2998  if (SubIdx == AMDGPU::sub1)
2999  return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
3000 
3001  llvm_unreachable("Unhandled register index for immediate");
3002  }
3003 
3004  unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
3005  SubIdx, SubRC);
3006  return MachineOperand::CreateReg(SubReg, false);
3007 }
3008 
3009 // Change the order of operands from (0, 1, 2) to (0, 2, 1)
3010 void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
3011  assert(Inst.getNumExplicitOperands() == 3);
3012  MachineOperand Op1 = Inst.getOperand(1);
3013  Inst.RemoveOperand(1);
3014  Inst.addOperand(Op1);
3015 }
3016 
3018  const MCOperandInfo &OpInfo,
3019  const MachineOperand &MO) const {
3020  if (!MO.isReg())
3021  return false;
3022 
3023  unsigned Reg = MO.getReg();
3024  const TargetRegisterClass *RC =
3026  MRI.getRegClass(Reg) :
3027  RI.getPhysRegClass(Reg);
3028 
3029  const SIRegisterInfo *TRI =
3030  static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
3031  RC = TRI->getSubRegClass(RC, MO.getSubReg());
3032 
3033  // In order to be legal, the common sub-class must be equal to the
3034  // class of the current operand. For example:
3035  //
3036  // v_mov_b32 s0 ; Operand defined as vsrc_b32
3037  // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
3038  //
3039  // s_sendmsg 0, s0 ; Operand defined as m0reg
3040  // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
3041 
3042  return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
3043 }
3044 
3046  const MCOperandInfo &OpInfo,
3047  const MachineOperand &MO) const {
3048  if (MO.isReg())
3049  return isLegalRegOperand(MRI, OpInfo, MO);
3050 
3051  // Handle non-register types that are treated like immediates.
3052  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
3053  return true;
3054 }
3055 
3056 bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
3057  const MachineOperand *MO) const {
3059  const MCInstrDesc &InstDesc = MI.getDesc();
3060  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
3061  const TargetRegisterClass *DefinedRC =
3062  OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
3063  if (!MO)
3064  MO = &MI.getOperand(OpIdx);
3065 
3066  if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
3067 
3068  RegSubRegPair SGPRUsed;
3069  if (MO->isReg())
3070  SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
3071 
3072  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
3073  if (i == OpIdx)
3074  continue;
3075  const MachineOperand &Op = MI.getOperand(i);
3076  if (Op.isReg()) {
3077  if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
3078  usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
3079  return false;
3080  }
3081  } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
3082  return false;
3083  }
3084  }
3085  }
3086 
3087  if (MO->isReg()) {
3088  assert(DefinedRC);
3089  return isLegalRegOperand(MRI, OpInfo, *MO);
3090  }
3091 
3092  // Handle non-register types that are treated like immediates.
3093  assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
3094 
3095  if (!DefinedRC) {
3096  // This operand expects an immediate.
3097  return true;
3098  }
3099 
3100  return isImmOperandLegal(MI, OpIdx, *MO);
3101 }
3102 
3104  MachineInstr &MI) const {
3105  unsigned Opc = MI.getOpcode();
3106  const MCInstrDesc &InstrDesc = get(Opc);
3107 
3108  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
3109  MachineOperand &Src1 = MI.getOperand(Src1Idx);
3110 
3111  // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
3112  // we need to only have one constant bus use.
3113  //
3114  // Note we do not need to worry about literal constants here. They are
3115  // disabled for the operand type for instructions because they will always
3116  // violate the one constant bus use rule.
3117  bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
3118  if (HasImplicitSGPR) {
3119  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3120  MachineOperand &Src0 = MI.getOperand(Src0Idx);
3121 
3122  if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
3123  legalizeOpWithMove(MI, Src0Idx);
3124  }
3125 
3126  // VOP2 src0 instructions support all operand types, so we don't need to check
3127  // their legality. If src1 is already legal, we don't need to do anything.
3128  if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
3129  return;
3130 
3131  // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
3132  // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
3133  // select is uniform.
3134  if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
3135  RI.isVGPR(MRI, Src1.getReg())) {
3136  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
3137  const DebugLoc &DL = MI.getDebugLoc();
3138  BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
3139  .add(Src1);
3140  Src1.ChangeToRegister(Reg, false);
3141  return;
3142  }
3143 
3144  // We do not use commuteInstruction here because it is too aggressive and will
3145  // commute if it is possible. We only want to commute here if it improves
3146  // legality. This can be called a fairly large number of times so don't waste
3147  // compile time pointlessly swapping and checking legality again.
3148  if (HasImplicitSGPR || !MI.isCommutable()) {
3149  legalizeOpWithMove(MI, Src1Idx);
3150  return;
3151  }
3152 
3153  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3154  MachineOperand &Src0 = MI.getOperand(Src0Idx);
3155 
3156  // If src0 can be used as src1, commuting will make the operands legal.
3157  // Otherwise we have to give up and insert a move.
3158  //
3159  // TODO: Other immediate-like operand kinds could be commuted if there was a
3160  // MachineOperand::ChangeTo* for them.
3161  if ((!Src1.isImm() && !Src1.isReg()) ||
3162  !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
3163  legalizeOpWithMove(MI, Src1Idx);
3164  return;
3165  }
3166 
3167  int CommutedOpc = commuteOpcode(MI);
3168  if (CommutedOpc == -1) {
3169  legalizeOpWithMove(MI, Src1Idx);
3170  return;
3171  }
3172 
3173  MI.setDesc(get(CommutedOpc));
3174 
3175  unsigned Src0Reg = Src0.getReg();
3176  unsigned Src0SubReg = Src0.getSubReg();
3177  bool Src0Kill = Src0.isKill();
3178 
3179  if (Src1.isImm())
3180  Src0.ChangeToImmediate(Src1.getImm());
3181  else if (Src1.isReg()) {
3182  Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
3183  Src0.setSubReg(Src1.getSubReg());
3184  } else
3185  llvm_unreachable("Should only have register or immediate operands");
3186 
3187  Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
3188  Src1.setSubReg(Src0SubReg);
3189 }
3190 
3191 // Legalize VOP3 operands. Because all operand types are supported for any
3192 // operand, and since literal constants are not allowed and should never be
3193 // seen, we only need to worry about inserting copies if we use multiple SGPR
3194 // operands.
3196  MachineInstr &MI) const {
3197  unsigned Opc = MI.getOpcode();
3198 
3199  int VOP3Idx[3] = {
3200  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
3201  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
3202  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
3203  };
3204 
3205  // Find the one SGPR operand we are allowed to use.
3206  unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
3207 
3208  for (unsigned i = 0; i < 3; ++i) {
3209  int Idx = VOP3Idx[i];
3210  if (Idx == -1)
3211  break;
3212  MachineOperand &MO = MI.getOperand(Idx);
3213 
3214  // We should never see a VOP3 instruction with an illegal immediate operand.
3215  if (!MO.isReg())
3216  continue;
3217 
3218  if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
3219  continue; // VGPRs are legal
3220 
3221  if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
3222  SGPRReg = MO.getReg();
3223  // We can use one SGPR in each VOP3 instruction.
3224  continue;
3225  }
3226 
3227  // If we make it this far, then the operand is not legal and we must
3228  // legalize it.
3229  legalizeOpWithMove(MI, Idx);
3230  }
3231 }
3232 
3234  MachineRegisterInfo &MRI) const {
3235  const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
3236  const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
3237  unsigned DstReg = MRI.createVirtualRegister(SRC);
3238  unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
3239 
3241  for (unsigned i = 0; i < SubRegs; ++i) {
3242  unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3243  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
3244  get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
3245  .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
3246  SRegs.push_back(SGPR);
3247  }
3248 
3249  MachineInstrBuilder MIB =
3250  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
3251  get(AMDGPU::REG_SEQUENCE), DstReg);
3252  for (unsigned i = 0; i < SubRegs; ++i) {
3253  MIB.addReg(SRegs[i]);
3254  MIB.addImm(RI.getSubRegFromChannel(i));
3255  }
3256  return DstReg;
3257 }
3258 
3260  MachineInstr &MI) const {
3261 
3262  // If the pointer is store in VGPRs, then we need to move them to
3263  // SGPRs using v_readfirstlane. This is safe because we only select
3264  // loads with uniform pointers to SMRD instruction so we know the
3265  // pointer value is uniform.
3266  MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
3267  if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
3268  unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
3269  SBase->setReg(SGPR);
3270  }
3271 }
3272 
3275  const TargetRegisterClass *DstRC,
3276  MachineOperand &Op,
3278  const DebugLoc &DL) const {
3279  unsigned OpReg = Op.getReg();
3280  unsigned OpSubReg = Op.getSubReg();
3281 
3282  const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
3283  RI.getRegClassForReg(MRI, OpReg), OpSubReg);
3284 
3285  // Check if operand is already the correct register class.
3286  if (DstRC == OpRC)
3287  return;
3288 
3289  unsigned DstReg = MRI.createVirtualRegister(DstRC);
3290  MachineInstr *Copy =
3291  BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
3292 
3293  Op.setReg(DstReg);
3294  Op.setSubReg(0);
3295 
3296  MachineInstr *Def = MRI.getVRegDef(OpReg);
3297  if (!Def)
3298  return;
3299 
3300  // Try to eliminate the copy if it is copying an immediate value.
3301  if (Def->isMoveImmediate())
3302  FoldImmediate(*Copy, *Def, OpReg, &MRI);
3303 }
3304 
3306  MachineFunction &MF = *MI.getParent()->getParent();
3308 
3309  // Legalize VOP2
3310  if (isVOP2(MI) || isVOPC(MI)) {
3311  legalizeOperandsVOP2(MRI, MI);
3312  return;
3313  }
3314 
3315  // Legalize VOP3
3316  if (isVOP3(MI)) {
3317  legalizeOperandsVOP3(MRI, MI);
3318  return;
3319  }
3320 
3321  // Legalize SMRD
3322  if (isSMRD(MI)) {
3323  legalizeOperandsSMRD(MRI, MI);
3324  return;
3325  }
3326 
3327  // Legalize REG_SEQUENCE and PHI
3328  // The register class of the operands much be the same type as the register
3329  // class of the output.
3330  if (MI.getOpcode() == AMDGPU::PHI) {
3331  const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
3332  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
3333  if (!MI.getOperand(i).isReg() ||
3335  continue;
3336  const TargetRegisterClass *OpRC =
3337  MRI.getRegClass(MI.getOperand(i).getReg());
3338  if (RI.hasVGPRs(OpRC)) {
3339  VRC = OpRC;
3340  } else {
3341  SRC = OpRC;
3342  }
3343  }
3344 
3345  // If any of the operands are VGPR registers, then they all most be
3346  // otherwise we will create illegal VGPR->SGPR copies when legalizing
3347  // them.
3348  if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
3349  if (!VRC) {
3350  assert(SRC);
3351  VRC = RI.getEquivalentVGPRClass(SRC);
3352  }
3353  RC = VRC;
3354  } else {
3355  RC = SRC;
3356  }
3357 
3358  // Update all the operands so they have the same type.
3359  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3360  MachineOperand &Op = MI.getOperand(I);
3362  continue;
3363 
3364  // MI is a PHI instruction.
3365  MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
3366  MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
3367 
3368  // Avoid creating no-op copies with the same src and dst reg class. These
3369  // confuse some of the machine passes.
3370  legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
3371  }
3372  }
3373 
3374  // REG_SEQUENCE doesn't really require operand legalization, but if one has a
3375  // VGPR dest type and SGPR sources, insert copies so all operands are
3376  // VGPRs. This seems to help operand folding / the register coalescer.
3377  if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
3378  MachineBasicBlock *MBB = MI.getParent();
3379  const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
3380  if (RI.hasVGPRs(DstRC)) {
3381  // Update all the operands so they are VGPR register classes. These may
3382  // not be the same register class because REG_SEQUENCE supports mixing
3383  // subregister index types e.g. sub0_sub1 + sub2 + sub3
3384  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3385  MachineOperand &Op = MI.getOperand(I);
3387  continue;
3388 
3389  const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
3390  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
3391  if (VRC == OpRC)
3392  continue;
3393 
3394  legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
3395  Op.setIsKill();
3396  }
3397  }
3398 
3399  return;
3400  }
3401 
3402  // Legalize INSERT_SUBREG
3403  // src0 must have the same register class as dst
3404  if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
3405  unsigned Dst = MI.getOperand(0).getReg();
3406  unsigned Src0 = MI.getOperand(1).getReg();
3407  const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
3408  const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
3409  if (DstRC != Src0RC) {
3410  MachineBasicBlock *MBB = MI.getParent();
3411  MachineOperand &Op = MI.getOperand(1);
3412  legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
3413  }
3414  return;
3415  }
3416 
3417  // Legalize MIMG and MUBUF/MTBUF for shaders.
3418  //
3419  // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
3420  // scratch memory access. In both cases, the legalization never involves
3421  // conversion to the addr64 form.
3422  if (isMIMG(MI) ||
3424  (isMUBUF(MI) || isMTBUF(MI)))) {
3425  MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
3426  if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
3427  unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
3428  SRsrc->setReg(SGPR);
3429  }
3430 
3431  MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
3432  if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
3433  unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
3434  SSamp->setReg(SGPR);
3435  }
3436  return;
3437  }
3438 
3439  // Legalize MUBUF* instructions by converting to addr64 form.
3440  // FIXME: If we start using the non-addr64 instructions for compute, we
3441  // may need to legalize them as above. This especially applies to the
3442  // buffer_load_format_* variants and variants with idxen (or bothen).
3443  int SRsrcIdx =
3444  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
3445  if (SRsrcIdx != -1) {
3446  // We have an MUBUF instruction
3447  MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx);
3448  unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass;
3449  if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
3450  RI.getRegClass(SRsrcRC))) {
3451  // The operands are legal.
3452  // FIXME: We may need to legalize operands besided srsrc.
3453  return;
3454  }
3455 
3456  MachineBasicBlock &MBB = *MI.getParent();
3457 
3458  // Extract the ptr from the resource descriptor.
3459  unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
3460  &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
3461 
3462  // Create an empty resource descriptor
3463  unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3464  unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3465  unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3466  unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
3467  uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
3468 
3469  // Zero64 = 0
3470  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64)
3471  .addImm(0);
3472 
3473  // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
3474  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
3475  .addImm(RsrcDataFormat & 0xFFFFFFFF);
3476 
3477  // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
3478  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
3479  .addImm(RsrcDataFormat >> 32);
3480 
3481  // NewSRsrc = {Zero64, SRsrcFormat}
3482  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
3483  .addReg(Zero64)
3484  .addImm(AMDGPU::sub0_sub1)
3485  .addReg(SRsrcFormatLo)
3486  .addImm(AMDGPU::sub2)
3487  .addReg(SRsrcFormatHi)
3488  .addImm(AMDGPU::sub3);
3489 
3490  MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
3491  unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
3492  if (VAddr) {
3493  // This is already an ADDR64 instruction so we need to add the pointer
3494  // extracted from the resource descriptor to the current value of VAddr.
3495  unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3496  unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3497 
3498  // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
3499  DebugLoc DL = MI.getDebugLoc();
3500  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
3501  .addReg(SRsrcPtr, 0, AMDGPU::sub0)
3502  .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
3503 
3504  // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
3505  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
3506  .addReg(SRsrcPtr, 0, AMDGPU::sub1)
3507  .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
3508 
3509  // NewVaddr = {NewVaddrHi, NewVaddrLo}
3510  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
3511  .addReg(NewVAddrLo)
3512  .addImm(AMDGPU::sub0)
3513  .addReg(NewVAddrHi)
3514  .addImm(AMDGPU::sub1);
3515  } else {
3516  // This instructions is the _OFFSET variant, so we need to convert it to
3517  // ADDR64.
3518  assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
3520  "FIXME: Need to emit flat atomics here");
3521 
3522  MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
3523  MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
3524  MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
3525  unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
3526 
3527  // Atomics rith return have have an additional tied operand and are
3528  // missing some of the special bits.
3529  MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
3530  MachineInstr *Addr64;
3531 
3532  if (!VDataIn) {
3533  // Regular buffer load / store.
3534  MachineInstrBuilder MIB =
3535  BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
3536  .add(*VData)
3537  .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
3538  // This will be replaced later
3539  // with the new value of vaddr.
3540  .add(*SRsrc)
3541  .add(*SOffset)
3542  .add(*Offset);
3543 
3544  // Atomics do not have this operand.
3545  if (const MachineOperand *GLC =
3546  getNamedOperand(MI, AMDGPU::OpName::glc)) {
3547  MIB.addImm(GLC->getImm());
3548  }
3549 
3550  MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
3551 
3552  if (const MachineOperand *TFE =
3553  getNamedOperand(MI, AMDGPU::OpName::tfe)) {
3554  MIB.addImm(TFE->getImm());
3555  }
3556 
3558  Addr64 = MIB;
3559  } else {
3560  // Atomics with return.
3561  Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
3562  .add(*VData)
3563  .add(*VDataIn)
3564  .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
3565  // This will be replaced later
3566  // with the new value of vaddr.
3567  .add(*SRsrc)
3568  .add(*SOffset)
3569  .add(*Offset)
3570  .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
3572  }
3573 
3574  MI.removeFromParent();
3575 
3576  // NewVaddr = {NewVaddrHi, NewVaddrLo}
3577  BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
3578  NewVAddr)
3579  .addReg(SRsrcPtr, 0, AMDGPU::sub0)
3580  .addImm(AMDGPU::sub0)
3581  .addReg(SRsrcPtr, 0, AMDGPU::sub1)
3582  .addImm(AMDGPU::sub1);
3583 
3584  VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
3585  SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
3586  }
3587 
3588  // Update the instruction to use NewVaddr
3589  VAddr->setReg(NewVAddr);
3590  // Update the instruction to use NewSRsrc
3591  SRsrc->setReg(NewSRsrc);
3592  }
3593 }
3594 
3596  SetVectorType Worklist;
3597  Worklist.insert(&TopInst);
3598 
3599  while (!Worklist.empty()) {
3600  MachineInstr &Inst = *Worklist.pop_back_val();
3601  MachineBasicBlock *MBB = Inst.getParent();
3603 
3604  unsigned Opcode = Inst.getOpcode();
3605  unsigned NewOpcode = getVALUOp(Inst);
3606 
3607  // Handle some special cases
3608  switch (Opcode) {
3609  default:
3610  break;
3611  case AMDGPU::S_ADD_U64_PSEUDO:
3612  case AMDGPU::S_SUB_U64_PSEUDO:
3613  splitScalar64BitAddSub(Worklist, Inst);
3614  Inst.eraseFromParent();
3615  continue;
3616  case AMDGPU::S_AND_B64:
3617  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
3618  Inst.eraseFromParent();
3619  continue;
3620 
3621  case AMDGPU::S_OR_B64:
3622  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
3623  Inst.eraseFromParent();
3624  continue;
3625 
3626  case AMDGPU::S_XOR_B64:
3627  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
3628  Inst.eraseFromParent();
3629  continue;
3630 
3631  case AMDGPU::S_NOT_B64:
3632  splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
3633  Inst.eraseFromParent();
3634  continue;
3635 
3636  case AMDGPU::S_BCNT1_I32_B64:
3637  splitScalar64BitBCNT(Worklist, Inst);
3638  Inst.eraseFromParent();
3639  continue;
3640 
3641  case AMDGPU::S_BFE_I64:
3642  splitScalar64BitBFE(Worklist, Inst);
3643  Inst.eraseFromParent();
3644  continue;
3645 
3646  case AMDGPU::S_LSHL_B32:
3648  NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
3649  swapOperands(Inst);
3650  }
3651  break;
3652  case AMDGPU::S_ASHR_I32:
3654  NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
3655  swapOperands(Inst);
3656  }
3657  break;
3658  case AMDGPU::S_LSHR_B32:
3660  NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
3661  swapOperands(Inst);
3662  }
3663  break;
3664  case AMDGPU::S_LSHL_B64:
3666  NewOpcode = AMDGPU::V_LSHLREV_B64;
3667  swapOperands(Inst);
3668  }
3669  break;
3670  case AMDGPU::S_ASHR_I64:
3672  NewOpcode = AMDGPU::V_ASHRREV_I64;
3673  swapOperands(Inst);
3674  }
3675  break;
3676  case AMDGPU::S_LSHR_B64:
3678  NewOpcode = AMDGPU::V_LSHRREV_B64;
3679  swapOperands(Inst);
3680  }
3681  break;
3682 
3683  case AMDGPU::S_ABS_I32:
3684  lowerScalarAbs(Worklist, Inst);
3685  Inst.eraseFromParent();
3686  continue;
3687 
3688  case AMDGPU::S_CBRANCH_SCC0:
3689  case AMDGPU::S_CBRANCH_SCC1:
3690  // Clear unused bits of vcc
3691  BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
3692  AMDGPU::VCC)
3693  .addReg(AMDGPU::EXEC)
3694  .addReg(AMDGPU::VCC);
3695  break;
3696 
3697  case AMDGPU::S_BFE_U64:
3698  case AMDGPU::S_BFM_B64:
3699  llvm_unreachable("Moving this op to VALU not implemented");
3700 
3701  case AMDGPU::S_PACK_LL_B32_B16:
3702  case AMDGPU::S_PACK_LH_B32_B16:
3703  case AMDGPU::S_PACK_HH_B32_B16:
3704  movePackToVALU(Worklist, MRI, Inst);
3705  Inst.eraseFromParent();
3706  continue;
3707 
3708  case AMDGPU::S_XNOR_B32:
3709  lowerScalarXnor(Worklist, Inst);
3710  Inst.eraseFromParent();
3711  continue;
3712 
3713  case AMDGPU::S_XNOR_B64:
3714  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32);
3715  Inst.eraseFromParent();
3716  continue;
3717 
3718  case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
3719  unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3720  const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff);
3721  auto Add = MRI.getUniqueVRegDef(VAddr->getReg());
3722  unsigned Offset = 0;
3723 
3724  // See if we can extract an immediate offset by recognizing one of these:
3725  // V_ADD_I32_e32 dst, imm, src1
3726  // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
3727  // V_ADD will be removed by "Remove dead machine instructions".
3728  if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
3729  const MachineOperand *Src =
3730  getNamedOperand(*Add, AMDGPU::OpName::src0);
3731 
3732  if (Src && Src->isReg()) {
3733  auto Mov = MRI.getUniqueVRegDef(Src->getReg());
3734  if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
3735  Src = &Mov->getOperand(1);
3736  }
3737 
3738  if (Src) {
3739  if (Src->isImm())
3740  Offset = Src->getImm();
3741  else if (Src->isCImm())
3742  Offset = Src->getCImm()->getZExtValue();
3743  }
3744 
3745  if (Offset && isLegalMUBUFImmOffset(Offset))
3746  VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
3747  else
3748  Offset = 0;
3749  }
3750 
3751  BuildMI(*MBB, Inst, Inst.getDebugLoc(),
3752  get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
3753  .add(*VAddr) // vaddr
3754  .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
3755  .addImm(0) // soffset
3756  .addImm(Offset) // offset
3757  .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
3758  .addImm(0) // slc
3759  .addImm(0) // tfe
3760  .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end());
3761 
3762  MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
3763  VDst);
3764  addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
3765  Inst.eraseFromParent();
3766  continue;
3767  }
3768  }
3769 
3770  if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
3771  // We cannot move this instruction to the VALU, so we should try to
3772  // legalize its operands instead.
3773  legalizeOperands(Inst);
3774  continue;
3775  }
3776 
3777  // Use the new VALU Opcode.
3778  const MCInstrDesc &NewDesc = get(NewOpcode);
3779  Inst.setDesc(NewDesc);
3780 
3781  // Remove any references to SCC. Vector instructions can't read from it, and
3782  // We're just about to add the implicit use / defs of VCC, and we don't want
3783  // both.
3784  for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
3785  MachineOperand &Op = Inst.getOperand(i);
3786  if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
3787  Inst.RemoveOperand(i);
3788  addSCCDefUsersToVALUWorklist(Inst, Worklist);
3789  }
3790  }
3791 
3792  if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
3793  // We are converting these to a BFE, so we need to add the missing
3794  // operands for the size and offset.
3795  unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
3798 
3799  } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
3800  // The VALU version adds the second operand to the result, so insert an
3801  // extra 0 operand.
3803  }
3804 
3806 
3807  if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
3808  const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
3809  // If we need to move this to VGPRs, we need to unpack the second operand
3810  // back into the 2 separate ones for bit offset and width.
3811  assert(OffsetWidthOp.isImm() &&
3812  "Scalar BFE is only implemented for constant width and offset");
3813  uint32_t Imm = OffsetWidthOp.getImm();
3814 
3815  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
3816  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
3817  Inst.RemoveOperand(2); // Remove old immediate.
3818  Inst.addOperand(MachineOperand::CreateImm(Offset));
3819  Inst.addOperand(MachineOperand::CreateImm(BitWidth));
3820  }
3821 
3822  bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
3823  unsigned NewDstReg = AMDGPU::NoRegister;
3824  if (HasDst) {
3825  unsigned DstReg = Inst.getOperand(0).getReg();
3827  continue;
3828 
3829  // Update the destination register class.
3830  const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
3831  if (!NewDstRC)
3832  continue;
3833 
3834  if (Inst.isCopy() &&
3836  NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
3837  // Instead of creating a copy where src and dst are the same register
3838  // class, we just replace all uses of dst with src. These kinds of
3839  // copies interfere with the heuristics MachineSink uses to decide
3840  // whether or not to split a critical edge. Since the pass assumes
3841  // that copies will end up as machine instructions and not be
3842  // eliminated.
3843  addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
3844  MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
3845  MRI.clearKillFlags(Inst.getOperand(1).getReg());
3846  Inst.getOperand(0).setReg(DstReg);
3847  continue;
3848  }
3849 
3850  NewDstReg = MRI.createVirtualRegister(NewDstRC);
3851  MRI.replaceRegWith(DstReg, NewDstReg);
3852  }
3853 
3854  // Legalize the operands
3855  legalizeOperands(Inst);
3856 
3857  if (HasDst)
3858  addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
3859  }
3860 }
3861 
3862 void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
3863  MachineInstr &Inst) const {
3864  MachineBasicBlock &MBB = *Inst.getParent();
3866  MachineBasicBlock::iterator MII = Inst;
3867  DebugLoc DL = Inst.getDebugLoc();
3868 
3869  MachineOperand &Dest = Inst.getOperand(0);
3870  MachineOperand &Src = Inst.getOperand(1);
3871  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3872  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3873 
3874  BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
3875  .addImm(0)
3876  .addReg(Src.getReg());
3877 
3878  BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
3879  .addReg(Src.getReg())
3880  .addReg(TmpReg);
3881 
3882  MRI.replaceRegWith(Dest.getReg(), ResultReg);
3883  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3884 }
3885 
3886 void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
3887  MachineInstr &Inst) const {
3888  MachineBasicBlock &MBB = *Inst.getParent();
3890  MachineBasicBlock::iterator MII = Inst;
3891  const DebugLoc &DL = Inst.getDebugLoc();
3892 
3893  MachineOperand &Dest = Inst.getOperand(0);
3894  MachineOperand &Src0 = Inst.getOperand(1);
3895  MachineOperand &Src1 = Inst.getOperand(2);
3896 
3897  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
3898  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
3899 
3900  unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3901  BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
3902  .add(Src0)
3903  .add(Src1);
3904 
3905  unsigned Not = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3906  BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), Not)
3907  .addReg(Xor);
3908 
3909  MRI.replaceRegWith(Dest.getReg(), Not);
3910  addUsersToMoveToVALUWorklist(Not, MRI, Worklist);
3911 }
3912 
3913 void SIInstrInfo::splitScalar64BitUnaryOp(
3914  SetVectorType &Worklist, MachineInstr &Inst,
3915  unsigned Opcode) const {
3916  MachineBasicBlock &MBB = *Inst.getParent();
3918 
3919  MachineOperand &Dest = Inst.getOperand(0);
3920  MachineOperand &Src0 = Inst.getOperand(1);
3921  DebugLoc DL = Inst.getDebugLoc();
3922 
3923  MachineBasicBlock::iterator MII = Inst;
3924 
3925  const MCInstrDesc &InstDesc = get(Opcode);
3926  const TargetRegisterClass *Src0RC = Src0.isReg() ?
3927  MRI.getRegClass(Src0.getReg()) :
3928  &AMDGPU::SGPR_32RegClass;
3929 
3930  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
3931 
3932  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3933  AMDGPU::sub0, Src0SubRC);
3934 
3935  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
3936  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
3937  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
3938 
3939  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
3940  BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
3941 
3942  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3943  AMDGPU::sub1, Src0SubRC);
3944 
3945  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
3946  BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
3947 
3948  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
3949  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
3950  .addReg(DestSub0)
3951  .addImm(AMDGPU::sub0)
3952  .addReg(DestSub1)
3953  .addImm(AMDGPU::sub1);
3954 
3955  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
3956 
3957  // We don't need to legalizeOperands here because for a single operand, src0
3958  // will support any kind of input.
3959 
3960  // Move all users of this moved value.
3961  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
3962 }
3963 
3964 void SIInstrInfo::splitScalar64BitAddSub(
3965  SetVectorType &Worklist, MachineInstr &Inst) const {
3966  bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
3967 
3968  MachineBasicBlock &MBB = *Inst.getParent();
3970 
3971  unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
3972  unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3973  unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3974 
3975  unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
3976  unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
3977 
3978  MachineOperand &Dest = Inst.getOperand(0);
3979  MachineOperand &Src0 = Inst.getOperand(1);
3980  MachineOperand &Src1 = Inst.getOperand(2);
3981  const DebugLoc &DL = Inst.getDebugLoc();
3982  MachineBasicBlock::iterator MII = Inst;
3983 
3984  const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg());
3985  const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg());
3986  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
3987  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
3988 
3989  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3990  AMDGPU::sub0, Src0SubRC);
3991  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
3992  AMDGPU::sub0, Src1SubRC);
3993 
3994 
3995  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
3996  AMDGPU::sub1, Src0SubRC);
3997  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
3998  AMDGPU::sub1, Src1SubRC);
3999 
4000  unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
4001  MachineInstr *LoHalf =
4002  BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
4003  .addReg(CarryReg, RegState::Define)
4004  .add(SrcReg0Sub0)
4005  .add(SrcReg1Sub0);
4006 
4007  unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
4008  MachineInstr *HiHalf =
4009  BuildMI(MBB, MII, DL, get(HiOpc), DestSub1)
4010  .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
4011  .add(SrcReg0Sub1)
4012  .add(SrcReg1Sub1)
4013  .addReg(CarryReg, RegState::Kill);
4014 
4015  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4016  .addReg(DestSub0)
4017  .addImm(AMDGPU::sub0)
4018  .addReg(DestSub1)
4019  .addImm(AMDGPU::sub1);
4020 
4021  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
4022 
4023  // Try to legalize the operands in case we need to swap the order to keep it
4024  // valid.
4025  legalizeOperands(*LoHalf);
4026  legalizeOperands(*HiHalf);
4027 
4028  // Move all users of this moved vlaue.
4029  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4030 }
4031 
4032 void SIInstrInfo::splitScalar64BitBinaryOp(
4033  SetVectorType &Worklist, MachineInstr &Inst,
4034  unsigned Opcode) const {
4035  MachineBasicBlock &MBB = *Inst.getParent();
4037 
4038  MachineOperand &Dest = Inst.getOperand(0);
4039  MachineOperand &Src0 = Inst.getOperand(1);
4040  MachineOperand &Src1 = Inst.getOperand(2);
4041  DebugLoc DL = Inst.getDebugLoc();
4042 
4043  MachineBasicBlock::iterator MII = Inst;
4044 
4045  const MCInstrDesc &InstDesc = get(Opcode);
4046  const TargetRegisterClass *Src0RC = Src0.isReg() ?
4047  MRI.getRegClass(Src0.getReg()) :
4048  &AMDGPU::SGPR_32RegClass;
4049 
4050  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
4051  const TargetRegisterClass *Src1RC = Src1.isReg() ?
4052  MRI.getRegClass(Src1.getReg()) :
4053  &AMDGPU::SGPR_32RegClass;
4054 
4055  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
4056 
4057  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4058  AMDGPU::sub0, Src0SubRC);
4059  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4060  AMDGPU::sub0, Src1SubRC);
4061 
4062  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
4063  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
4064  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
4065 
4066  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
4067  MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
4068  .add(SrcReg0Sub0)
4069  .add(SrcReg1Sub0);
4070 
4071  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4072  AMDGPU::sub1, Src0SubRC);
4073  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4074  AMDGPU::sub1, Src1SubRC);
4075 
4076  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
4077  MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
4078  .add(SrcReg0Sub1)
4079  .add(SrcReg1Sub1);
4080 
4081  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
4082  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4083  .addReg(DestSub0)
4084  .addImm(AMDGPU::sub0)
4085  .addReg(DestSub1)
4086  .addImm(AMDGPU::sub1);
4087 
4088  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
4089 
4090  // Try to legalize the operands in case we need to swap the order to keep it
4091  // valid.
4092  legalizeOperands(LoHalf);
4093  legalizeOperands(HiHalf);
4094 
4095  // Move all users of this moved vlaue.
4096  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4097 }
4098 
4099 void SIInstrInfo::splitScalar64BitBCNT(
4100  SetVectorType &Worklist, MachineInstr &Inst) const {
4101  MachineBasicBlock &MBB = *Inst.getParent();
4103 
4104  MachineBasicBlock::iterator MII = Inst;
4105  DebugLoc DL = Inst.getDebugLoc();
4106 
4107  MachineOperand &Dest = Inst.getOperand(0);
4108  MachineOperand &Src = Inst.getOperand(1);
4109 
4110  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
4111  const TargetRegisterClass *SrcRC = Src.isReg() ?
4112  MRI.getRegClass(Src.getReg()) :
4113  &AMDGPU::SGPR_32RegClass;
4114 
4115  unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4116  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4117 
4118  const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
4119 
4120  MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
4121  AMDGPU::sub0, SrcSubRC);
4122  MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
4123  AMDGPU::sub1, SrcSubRC);
4124 
4125  BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
4126 
4127  BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
4128 
4129  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4130 
4131  // We don't need to legalize operands here. src0 for etiher instruction can be
4132  // an SGPR, and the second input is unused or determined here.
4133  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4134 }
4135 
4136 void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
4137  MachineInstr &Inst) const {
4138  MachineBasicBlock &MBB = *Inst.getParent();
4140  MachineBasicBlock::iterator MII = Inst;
4141  DebugLoc DL = Inst.getDebugLoc();
4142 
4143  MachineOperand &Dest = Inst.getOperand(0);
4144  uint32_t Imm = Inst.getOperand(2).getImm();
4145  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
4146  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
4147 
4148  (void) Offset;
4149 
4150  // Only sext_inreg cases handled.
4151  assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
4152  Offset == 0 && "Not implemented");
4153 
4154  if (BitWidth < 32) {
4155  unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4156  unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4157  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4158 
4159  BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
4160  .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
4161  .addImm(0)
4162  .addImm(BitWidth);
4163 
4164  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
4165  .addImm(31)
4166  .addReg(MidRegLo);
4167 
4168  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
4169  .addReg(MidRegLo)
4170  .addImm(AMDGPU::sub0)
4171  .addReg(MidRegHi)
4172  .addImm(AMDGPU::sub1);
4173 
4174  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4175  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4176  return;
4177  }
4178 
4179  MachineOperand &Src = Inst.getOperand(1);
4180  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4181  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4182 
4183  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
4184  .addImm(31)
4185  .addReg(Src.getReg(), 0, AMDGPU::sub0);
4186 
4187  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
4188  .addReg(Src.getReg(), 0, AMDGPU::sub0)
4189  .addImm(AMDGPU::sub0)
4190  .addReg(TmpReg)
4191  .addImm(AMDGPU::sub1);
4192 
4193  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4194  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4195 }
4196 
4197 void SIInstrInfo::addUsersToMoveToVALUWorklist(
4198  unsigned DstReg,
4200  SetVectorType &Worklist) const {
4201  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
4202  E = MRI.use_end(); I != E;) {
4203  MachineInstr &UseMI = *I->getParent();
4204  if (!canReadVGPR(UseMI, I.getOperandNo())) {
4205  Worklist.insert(&UseMI);
4206 
4207  do {
4208  ++I;
4209  } while (I != E && I->getParent() == &UseMI);
4210  } else {
4211  ++I;
4212  }
4213  }
4214 }
4215 
4216 void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
4217  MachineRegisterInfo &MRI,
4218  MachineInstr &Inst) const {
4219  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4220  MachineBasicBlock *MBB = Inst.getParent();
4221  MachineOperand &Src0 = Inst.getOperand(1);
4222  MachineOperand &Src1 = Inst.getOperand(2);
4223  const DebugLoc &DL = Inst.getDebugLoc();
4224 
4225  switch (Inst.getOpcode()) {
4226  case AMDGPU::S_PACK_LL_B32_B16: {
4227  unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4228  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4229 
4230  // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
4231  // 0.
4232  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
4233  .addImm(0xffff);
4234 
4235  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
4236  .addReg(ImmReg, RegState::Kill)
4237  .add(Src0);
4238 
4239  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
4240  .add(Src1)
4241  .addImm(16)
4242  .addReg(TmpReg, RegState::Kill);
4243  break;
4244  }
4245  case AMDGPU::S_PACK_LH_B32_B16: {
4246  unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4247  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
4248  .addImm(0xffff);
4249  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
4250  .addReg(ImmReg, RegState::Kill)
4251  .add(Src0)
4252  .add(Src1);
4253  break;
4254  }
4255  case AMDGPU::S_PACK_HH_B32_B16: {
4256  unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4257  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4258  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
4259  .addImm(16)
4260  .add(Src0);
4261  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
4262  .addImm(0xffff0000);
4263  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
4264  .add(Src1)
4265  .addReg(ImmReg, RegState::Kill)
4266  .addReg(TmpReg, RegState::Kill);
4267  break;
4268  }
4269  default:
4270  llvm_unreachable("unhandled s_pack_* instruction");
4271  }
4272 
4273  MachineOperand &Dest = Inst.getOperand(0);
4274  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4275  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4276 }
4277 
4278 void SIInstrInfo::addSCCDefUsersToVALUWorklist(
4279  MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
4280  // This assumes that all the users of SCC are in the same block
4281  // as the SCC def.
4282  for (MachineInstr &MI :
4284  SCCDefInst.getParent()->end())) {
4285  // Exit if we find another SCC def.
4286  if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
4287  return;
4288 
4289  if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
4290  Worklist.insert(&MI);
4291  }
4292 }
4293 
4294 const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
4295  const MachineInstr &Inst) const {
4296  const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
4297 
4298  switch (Inst.getOpcode()) {
4299  // For target instructions, getOpRegClass just returns the virtual register
4300  // class associated with the operand, so we need to find an equivalent VGPR
4301  // register class in order to move the instruction to the VALU.
4302  case AMDGPU::COPY:
4303  case AMDGPU::PHI:
4304  case AMDGPU::REG_SEQUENCE:
4305  case AMDGPU::INSERT_SUBREG:
4306  case AMDGPU::WQM:
4307  case AMDGPU::WWM:
4308  if (RI.hasVGPRs(NewDstRC))
4309  return nullptr;
4310 
4311  NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
4312  if (!NewDstRC)
4313  return nullptr;
4314  return NewDstRC;
4315  default:
4316  return NewDstRC;
4317  }
4318 }
4319 
4320 // Find the one SGPR operand we are allowed to use.
4321 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
4322  int OpIndices[3]) const {
4323  const MCInstrDesc &Desc = MI.getDesc();
4324 
4325  // Find the one SGPR operand we are allowed to use.
4326  //
4327  // First we need to consider the instruction's operand requirements before
4328  // legalizing. Some operands are required to be SGPRs, such as implicit uses
4329  // of VCC, but we are still bound by the constant bus requirement to only use
4330  // one.
4331  //
4332  // If the operand's class is an SGPR, we can never move it.
4333 
4334  unsigned SGPRReg = findImplicitSGPRRead(MI);
4335  if (SGPRReg != AMDGPU::NoRegister)
4336  return SGPRReg;
4337 
4338  unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
4339  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4340 
4341  for (unsigned i = 0; i < 3; ++i) {
4342  int Idx = OpIndices[i];
4343  if (Idx == -1)
4344  break;
4345 
4346  const MachineOperand &MO = MI.getOperand(Idx);
4347  if (!MO.isReg())
4348  continue;
4349 
4350  // Is this operand statically required to be an SGPR based on the operand
4351  // constraints?
4352  const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
4353  bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
4354  if (IsRequiredSGPR)
4355  return MO.getReg();
4356 
4357  // If this could be a VGPR or an SGPR, Check the dynamic register class.
4358  unsigned Reg = MO.getReg();
4359  const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
4360  if (RI.isSGPRClass(RegRC))
4361  UsedSGPRs[i] = Reg;
4362  }
4363 
4364  // We don't have a required SGPR operand, so we have a bit more freedom in
4365  // selecting operands to move.
4366 
4367  // Try to select the most used SGPR. If an SGPR is equal to one of the
4368  // others, we choose that.
4369  //
4370  // e.g.
4371  // V_FMA_F32 v0, s0, s0, s0 -> No moves
4372  // V_FMA_F32 v0, s0, s1, s0 -> Move s1
4373 
4374  // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
4375  // prefer those.
4376 
4377  if (UsedSGPRs[0] != AMDGPU::NoRegister) {
4378  if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
4379  SGPRReg = UsedSGPRs[0];
4380  }
4381 
4382  if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
4383  if (UsedSGPRs[1] == UsedSGPRs[2])
4384  SGPRReg = UsedSGPRs[1];
4385  }
4386 
4387  return SGPRReg;
4388 }
4389 
4391  unsigned OperandName) const {
4392  int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
4393  if (Idx == -1)
4394  return nullptr;
4395 
4396  return &MI.getOperand(Idx);
4397 }
4398 
4400  uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
4401  if (ST.isAmdHsaOS()) {
4402  // Set ATC = 1. GFX9 doesn't have this bit.
4404  RsrcDataFormat |= (1ULL << 56);
4405 
4406  // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
4407  // BTW, it disables TC L2 and therefore decreases performance.
4409  RsrcDataFormat |= (2ULL << 59);
4410  }
4411 
4412  return RsrcDataFormat;
4413 }
4414 
4416  uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
4418  0xffffffff; // Size;
4419 
4420  // GFX9 doesn't have ELEMENT_SIZE.
4422  uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
4423  Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
4424  }
4425 
4426  // IndexStride = 64.
4427  Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
4428 
4429  // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
4430  // Clear them unless we want a huge stride.
4432  Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
4433 
4434  return Rsrc23;
4435 }
4436 
4438  unsigned Opc = MI.getOpcode();
4439 
4440  return isSMRD(Opc);
4441 }
4442 
4444  unsigned Opc = MI.getOpcode();
4445 
4446  return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
4447 }
4448 
4450  int &FrameIndex) const {
4451  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
4452  if (!Addr || !Addr->isFI())
4453  return AMDGPU::NoRegister;
4454 
4455  assert(!MI.memoperands_empty() &&
4456  (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
4457 
4458  FrameIndex = Addr->getIndex();
4459  return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
4460 }
4461 
4463  int &FrameIndex) const {
4464  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
4465  assert(Addr && Addr->isFI());
4466  FrameIndex = Addr->getIndex();
4467  return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
4468 }
4469 
4471  int &FrameIndex) const {
4472  if (!MI.mayLoad())
4473  return AMDGPU::NoRegister;
4474 
4475  if (isMUBUF(MI) || isVGPRSpill(MI))
4476  return isStackAccess(MI, FrameIndex);
4477 
4478  if (isSGPRSpill(MI))
4479  return isSGPRStackAccess(MI, FrameIndex);
4480 
4481  return AMDGPU::NoRegister;
4482 }
4483 
4485  int &FrameIndex) const {
4486  if (!MI.mayStore())
4487  return AMDGPU::NoRegister;
4488 
4489  if (isMUBUF(MI) || isVGPRSpill(MI))
4490  return isStackAccess(MI, FrameIndex);
4491 
4492  if (isSGPRSpill(MI))
4493  return isSGPRStackAccess(MI, FrameIndex);
4494 
4495  return AMDGPU::NoRegister;
4496 }
4497 
4499  unsigned Size = 0;
4502  while (++I != E && I->isInsideBundle()) {
4503  assert(!I->isBundle() && "No nested bundle!");
4504  Size += getInstSizeInBytes(*I);
4505  }
4506 
4507  return Size;
4508 }
4509 
4511  unsigned Opc = MI.getOpcode();
4512  const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
4513  unsigned DescSize = Desc.getSize();
4514 
4515  // If we have a definitive size, we can use it. Otherwise we need to inspect
4516  // the operands to know the size.
4517  //
4518  // FIXME: Instructions that have a base 32-bit encoding report their size as
4519  // 4, even though they are really 8 bytes if they have a literal operand.
4520  if (DescSize != 0 && DescSize != 4)
4521  return DescSize;
4522 
4523  // 4-byte instructions may have a 32-bit literal encoded after them. Check
4524  // operands that coud ever be literals.
4525  if (isVALU(MI) || isSALU(MI)) {
4526  if (isFixedSize(MI))
4527  return DescSize;
4528 
4529  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4530  if (Src0Idx == -1)
4531  return 4; // No operands.
4532 
4533  if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
4534  return 8;
4535 
4536  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4537  if (Src1Idx == -1)
4538  return 4;
4539 
4540  if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
4541  return 8;
4542 
4543  return 4;
4544  }
4545 
4546  if (DescSize == 4)
4547  return 4;
4548 
4549  switch (Opc) {
4550  case TargetOpcode::IMPLICIT_DEF:
4551  case TargetOpcode::KILL:
4552  case TargetOpcode::DBG_VALUE:
4554  return 0;
4555  case TargetOpcode::BUNDLE:
4556  return getInstBundleSize(MI);
4557  case TargetOpcode::INLINEASM: {
4558  const MachineFunction *MF = MI.getParent()->getParent();
4559  const char *AsmStr = MI.getOperand(0).getSymbolName();
4560  return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
4561  }
4562  default:
4563  llvm_unreachable("unable to find instruction size");
4564  }
4565 }
4566 
4568  if (!isFLAT(MI))
4569  return false;
4570 
4571  if (MI.memoperands_empty())
4572  return true;
4573 
4574  for (const MachineMemOperand *MMO : MI.memoperands()) {
4575  if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
4576  return true;
4577  }
4578  return false;
4579 }
4580 
4582  return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
4583 }
4584 
4586  MachineBasicBlock *IfEnd) const {
4588  assert(TI != IfEntry->end());
4589 
4590  MachineInstr *Branch = &(*TI);
4591  MachineFunction *MF = IfEntry->getParent();
4592  MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
4593 
4594  if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
4595  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4596  MachineInstr *SIIF =
4597  BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
4598  .add(Branch->getOperand(0))
4599  .add(Branch->getOperand(1));
4600  MachineInstr *SIEND =
4601  BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
4602  .addReg(DstReg);
4603 
4604  IfEntry->erase(TI);
4605  IfEntry->insert(IfEntry->end(), SIIF);
4606  IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
4607  }
4608 }
4609 
4611  MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const {
4613  // We expect 2 terminators, one conditional and one unconditional.
4614  assert(TI != LoopEnd->end());
4615 
4616  MachineInstr *Branch = &(*TI);
4617  MachineFunction *MF = LoopEnd->getParent();
4618  MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
4619 
4620  if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
4621 
4622  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4623  unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4624  MachineInstrBuilder HeaderPHIBuilder =
4625  BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
4626  for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
4627  E = LoopEntry->pred_end();
4628  PI != E; ++PI) {
4629  if (*PI == LoopEnd) {
4630  HeaderPHIBuilder.addReg(BackEdgeReg);
4631  } else {
4632  MachineBasicBlock *PMBB = *PI;
4633  unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4635  ZeroReg, 0);
4636  HeaderPHIBuilder.addReg(ZeroReg);
4637  }
4638  HeaderPHIBuilder.addMBB(*PI);
4639  }
4640  MachineInstr *HeaderPhi = HeaderPHIBuilder;
4641  MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
4642  get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
4643  .addReg(DstReg)
4644  .add(Branch->getOperand(0));
4645  MachineInstr *SILOOP =
4646  BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
4647  .addReg(BackEdgeReg)
4648  .addMBB(LoopEntry);
4649 
4650  LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
4651  LoopEnd->erase(TI);
4652  LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
4653  LoopEnd->insert(LoopEnd->end(), SILOOP);
4654  }
4655 }
4656 
4659  static const std::pair<int, const char *> TargetIndices[] = {
4660  {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
4661  {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
4662  {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
4663  {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
4664  {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
4665  return makeArrayRef(TargetIndices);
4666 }
4667 
4668 /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
4669 /// post-RA version of misched uses CreateTargetMIHazardRecognizer.
4672  const ScheduleDAG *DAG) const {
4673  return new GCNHazardRecognizer(DAG->MF);
4674 }
4675 
4676 /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
4677 /// pass.
4680  return new GCNHazardRecognizer(MF);
4681 }
4682 
4683 std::pair<unsigned, unsigned>
4685  return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
4686 }
4687 
4690  static const std::pair<unsigned, const char *> TargetFlags[] = {
4691  { MO_GOTPCREL, "amdgpu-gotprel" },
4692  { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
4693  { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
4694  { MO_REL32_LO, "amdgpu-rel32-lo" },
4695  { MO_REL32_HI, "amdgpu-rel32-hi" }
4696  };
4697 
4698  return makeArrayRef(TargetFlags);
4699 }
4700 
4702  return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
4703  MI.modifiesRegister(AMDGPU::EXEC, &RI);
4704 }
4705 
4709  const DebugLoc &DL,
4710  unsigned DestReg) const {
4711  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4712 
4713  unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4714 
4715  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
4716  .addReg(UnusedCarry, RegState::Define | RegState::Dead);
4717 }
4718 
4719 bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
4720  switch (Opcode) {
4721  case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
4722  case AMDGPU::SI_KILL_I1_TERMINATOR:
4723  return true;
4724  default:
4725  return false;
4726  }
4727 }
4728 
4730  switch (Opcode) {
4731  case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
4732  return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
4733  case AMDGPU::SI_KILL_I1_PSEUDO:
4734  return get(AMDGPU::SI_KILL_I1_TERMINATOR);
4735  default:
4736  llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO");
4737  }
4738 }
unsigned getTargetFlags() const
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
uint64_t CallInst * C
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:515
const MachineInstrBuilder & add(const MachineOperand &MO) const
void legalizeOperands(MachineInstr &MI) const
Legalize all operands in this instruction.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isVGPRSpillingEnabled(const Function &F) const
static bool isSGPRSpill(const MachineInstr &MI)
Definition: SIInstrInfo.h:504
Interface definition for SIRegisterInfo.
bool hasRegisterImplicitUseOperand(unsigned Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:965
Generation getGeneration() const
bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool IsDead
instr_iterator instr_end()
bool isVGPRCopy(const MachineInstr &MI) const
Definition: SIInstrInfo.h:594
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:228
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool sopkIsZext(const MachineInstr &MI)
Definition: SIInstrInfo.h:548
uint64_t getDefaultRsrcDataFormat() const
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static bool isStride64(unsigned Opc)
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
void setIsUndef(bool Val=true)
unsigned insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
unsigned getSubReg() const
bool isInlineAsm() const
Definition: MachineInstr.h:832
static bool isSOPK(const MachineInstr &MI)
Definition: SIInstrInfo.h:339
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
uint64_t getSize() const
Return the size in bytes of the memory reference.
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
Definition: SIInstrInfo.h:813
MachineBasicBlock reference.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
A debug info location.
Definition: DebugLoc.h:34
bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:396
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc)
static SDValue findChainOperand(SDNode *Load)
Definition: SIInstrInfo.cpp:86
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:411
void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
return AArch64::GPR64RegClass contains(Reg)
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:525
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
static bool isFixedSize(const MachineInstr &MI)
Definition: SIInstrInfo.h:566
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:570
unsigned getSubRegFromChannel(unsigned Channel) const
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:432
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
A description of a memory reference used in the backend.
static use_iterator use_end()
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
bool hasInv2PiInlineImm() const
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:456
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
uint64_t getScratchRsrcWords23() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:440
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void setImplicit(bool Val=true)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool opCanUseInlineConstant(unsigned OpType) const
void setHasSpilledVGPRs(bool Spill=true)
Name of external global symbol.
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:474
void insertWaitStates(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Count) const
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
bool opCanUseLiteralConstant(unsigned OpType) const
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
Definition: SIInstrInfo.h:826
const char * getSymbolName() const
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:299
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getAddressSpaceForPseudoSourceKind(PseudoSourceValue::PSVKind Kind) const override
unsigned getID() const
Return the register class ID number.
bool isSGPRClass(const TargetRegisterClass *RC) const
This file implements a class to represent arbitrary precision integral constant values and operations...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:634
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:82
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
bool hasSDWAOutModsVOPC() const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1554
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:395
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const RegList & Regs
static bool shouldReadExec(const MachineInstr &MI)
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
const uint64_t RSRC_DATA_FORMAT
Definition: SIInstrInfo.h:918
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
Itinerary data supplied by a subtarget to be used by a target.
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
static unsigned findImplicitSGPRRead(const MachineInstr &MI)
bool isBasicBlockPrologue(const MachineInstr &MI) const override