LLVM  7.0.0svn
SIInstrInfo.cpp
Go to the documentation of this file.
1 //===- SIInstrInfo.cpp - SI Instruction Information ----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI Implementation of TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIInstrInfo.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "GCNHazardRecognizer.h"
19 #include "SIDefines.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "SIRegisterInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/InlineAsm.h"
50 #include "llvm/IR/LLVMContext.h"
51 #include "llvm/MC/MCInstrDesc.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
58 #include <cassert>
59 #include <cstdint>
60 #include <iterator>
61 #include <utility>
62 
63 using namespace llvm;
64 
65 // Must be at least 4 to be able to branch over minimum unconditional branch
66 // code. This is only for making it possible to write reasonably small tests for
67 // long branches.
68 static cl::opt<unsigned>
69 BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
70  cl::desc("Restrict range of branch instructions (DEBUG)"));
71 
73  : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
74 
75 //===----------------------------------------------------------------------===//
76 // TargetInstrInfo callbacks
77 //===----------------------------------------------------------------------===//
78 
79 static unsigned getNumOperandsNoGlue(SDNode *Node) {
80  unsigned N = Node->getNumOperands();
81  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
82  --N;
83  return N;
84 }
85 
87  SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
88  assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
89  return LastOp;
90 }
91 
92 /// \brief Returns true if both nodes have the same value for the given
93 /// operand \p Op, or if both nodes do not have this operand.
94 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
95  unsigned Opc0 = N0->getMachineOpcode();
96  unsigned Opc1 = N1->getMachineOpcode();
97 
98  int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
99  int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
100 
101  if (Op0Idx == -1 && Op1Idx == -1)
102  return true;
103 
104 
105  if ((Op0Idx == -1 && Op1Idx != -1) ||
106  (Op1Idx == -1 && Op0Idx != -1))
107  return false;
108 
109  // getNamedOperandIdx returns the index for the MachineInstr's operands,
110  // which includes the result as the first operand. We are indexing into the
111  // MachineSDNode's operands, so we need to skip the result operand to get
112  // the real index.
113  --Op0Idx;
114  --Op1Idx;
115 
116  return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
117 }
118 
120  AliasAnalysis *AA) const {
121  // TODO: The generic check fails for VALU instructions that should be
122  // rematerializable due to implicit reads of exec. We really want all of the
123  // generic logic for this except for this.
124  switch (MI.getOpcode()) {
125  case AMDGPU::V_MOV_B32_e32:
126  case AMDGPU::V_MOV_B32_e64:
127  case AMDGPU::V_MOV_B64_PSEUDO:
128  return true;
129  default:
130  return false;
131  }
132 }
133 
135  int64_t &Offset0,
136  int64_t &Offset1) const {
137  if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
138  return false;
139 
140  unsigned Opc0 = Load0->getMachineOpcode();
141  unsigned Opc1 = Load1->getMachineOpcode();
142 
143  // Make sure both are actually loads.
144  if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
145  return false;
146 
147  if (isDS(Opc0) && isDS(Opc1)) {
148 
149  // FIXME: Handle this case:
150  if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
151  return false;
152 
153  // Check base reg.
154  if (Load0->getOperand(1) != Load1->getOperand(1))
155  return false;
156 
157  // Check chain.
158  if (findChainOperand(Load0) != findChainOperand(Load1))
159  return false;
160 
161  // Skip read2 / write2 variants for simplicity.
162  // TODO: We should report true if the used offsets are adjacent (excluded
163  // st64 versions).
164  if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
165  AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
166  return false;
167 
168  Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
169  Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
170  return true;
171  }
172 
173  if (isSMRD(Opc0) && isSMRD(Opc1)) {
174  // Skip time and cache invalidation instructions.
175  if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 ||
176  AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
177  return false;
178 
180 
181  // Check base reg.
182  if (Load0->getOperand(0) != Load1->getOperand(0))
183  return false;
184 
185  const ConstantSDNode *Load0Offset =
186  dyn_cast<ConstantSDNode>(Load0->getOperand(1));
187  const ConstantSDNode *Load1Offset =
188  dyn_cast<ConstantSDNode>(Load1->getOperand(1));
189 
190  if (!Load0Offset || !Load1Offset)
191  return false;
192 
193  // Check chain.
194  if (findChainOperand(Load0) != findChainOperand(Load1))
195  return false;
196 
197  Offset0 = Load0Offset->getZExtValue();
198  Offset1 = Load1Offset->getZExtValue();
199  return true;
200  }
201 
202  // MUBUF and MTBUF can access the same addresses.
203  if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
204 
205  // MUBUF and MTBUF have vaddr at different indices.
206  if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
207  findChainOperand(Load0) != findChainOperand(Load1) ||
208  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
209  !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
210  return false;
211 
212  int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
213  int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
214 
215  if (OffIdx0 == -1 || OffIdx1 == -1)
216  return false;
217 
218  // getNamedOperandIdx returns the index for MachineInstrs. Since they
219  // inlcude the output in the operand list, but SDNodes don't, we need to
220  // subtract the index by one.
221  --OffIdx0;
222  --OffIdx1;
223 
224  SDValue Off0 = Load0->getOperand(OffIdx0);
225  SDValue Off1 = Load1->getOperand(OffIdx1);
226 
227  // The offset might be a FrameIndexSDNode.
228  if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
229  return false;
230 
231  Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
232  Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
233  return true;
234  }
235 
236  return false;
237 }
238 
239 static bool isStride64(unsigned Opc) {
240  switch (Opc) {
241  case AMDGPU::DS_READ2ST64_B32:
242  case AMDGPU::DS_READ2ST64_B64:
243  case AMDGPU::DS_WRITE2ST64_B32:
244  case AMDGPU::DS_WRITE2ST64_B64:
245  return true;
246  default:
247  return false;
248  }
249 }
250 
251 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
252  int64_t &Offset,
253  const TargetRegisterInfo *TRI) const {
254  unsigned Opc = LdSt.getOpcode();
255 
256  if (isDS(LdSt)) {
257  const MachineOperand *OffsetImm =
258  getNamedOperand(LdSt, AMDGPU::OpName::offset);
259  if (OffsetImm) {
260  // Normal, single offset LDS instruction.
261  const MachineOperand *AddrReg =
262  getNamedOperand(LdSt, AMDGPU::OpName::addr);
263 
264  BaseReg = AddrReg->getReg();
265  Offset = OffsetImm->getImm();
266  return true;
267  }
268 
269  // The 2 offset instructions use offset0 and offset1 instead. We can treat
270  // these as a load with a single offset if the 2 offsets are consecutive. We
271  // will use this for some partially aligned loads.
272  const MachineOperand *Offset0Imm =
273  getNamedOperand(LdSt, AMDGPU::OpName::offset0);
274  const MachineOperand *Offset1Imm =
275  getNamedOperand(LdSt, AMDGPU::OpName::offset1);
276 
277  uint8_t Offset0 = Offset0Imm->getImm();
278  uint8_t Offset1 = Offset1Imm->getImm();
279 
280  if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
281  // Each of these offsets is in element sized units, so we need to convert
282  // to bytes of the individual reads.
283 
284  unsigned EltSize;
285  if (LdSt.mayLoad())
286  EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
287  else {
288  assert(LdSt.mayStore());
289  int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
290  EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
291  }
292 
293  if (isStride64(Opc))
294  EltSize *= 64;
295 
296  const MachineOperand *AddrReg =
297  getNamedOperand(LdSt, AMDGPU::OpName::addr);
298  BaseReg = AddrReg->getReg();
299  Offset = EltSize * Offset0;
300  return true;
301  }
302 
303  return false;
304  }
305 
306  if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
307  const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
308  if (SOffset && SOffset->isReg())
309  return false;
310 
311  const MachineOperand *AddrReg =
312  getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
313  if (!AddrReg)
314  return false;
315 
316  const MachineOperand *OffsetImm =
317  getNamedOperand(LdSt, AMDGPU::OpName::offset);
318  BaseReg = AddrReg->getReg();
319  Offset = OffsetImm->getImm();
320 
321  if (SOffset) // soffset can be an inline immediate.
322  Offset += SOffset->getImm();
323 
324  return true;
325  }
326 
327  if (isSMRD(LdSt)) {
328  const MachineOperand *OffsetImm =
329  getNamedOperand(LdSt, AMDGPU::OpName::offset);
330  if (!OffsetImm)
331  return false;
332 
333  const MachineOperand *SBaseReg =
334  getNamedOperand(LdSt, AMDGPU::OpName::sbase);
335  BaseReg = SBaseReg->getReg();
336  Offset = OffsetImm->getImm();
337  return true;
338  }
339 
340  if (isFLAT(LdSt)) {
341  const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
342  if (VAddr) {
343  // Can't analyze 2 offsets.
344  if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
345  return false;
346 
347  BaseReg = VAddr->getReg();
348  } else {
349  // scratch instructions have either vaddr or saddr.
350  BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg();
351  }
352 
353  Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
354  return true;
355  }
356 
357  return false;
358 }
359 
360 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1,
361  const MachineInstr &MI2, unsigned BaseReg2) {
362  if (BaseReg1 == BaseReg2)
363  return true;
364 
365  if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
366  return false;
367 
368  auto MO1 = *MI1.memoperands_begin();
369  auto MO2 = *MI2.memoperands_begin();
370  if (MO1->getAddrSpace() != MO2->getAddrSpace())
371  return false;
372 
373  auto Base1 = MO1->getValue();
374  auto Base2 = MO2->getValue();
375  if (!Base1 || !Base2)
376  return false;
377  const MachineFunction &MF = *MI1.getParent()->getParent();
378  const DataLayout &DL = MF.getFunction().getParent()->getDataLayout();
379  Base1 = GetUnderlyingObject(Base1, DL);
380  Base2 = GetUnderlyingObject(Base1, DL);
381 
382  if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
383  return false;
384 
385  return Base1 == Base2;
386 }
387 
389  unsigned BaseReg1,
390  MachineInstr &SecondLdSt,
391  unsigned BaseReg2,
392  unsigned NumLoads) const {
393  if (!memOpsHaveSameBasePtr(FirstLdSt, BaseReg1, SecondLdSt, BaseReg2))
394  return false;
395 
396  const MachineOperand *FirstDst = nullptr;
397  const MachineOperand *SecondDst = nullptr;
398 
399  if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
400  (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
401  (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
402  const unsigned MaxGlobalLoadCluster = 6;
403  if (NumLoads > MaxGlobalLoadCluster)
404  return false;
405 
406  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
407  if (!FirstDst)
408  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
409  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
410  if (!SecondDst)
411  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
412  } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
413  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
414  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
415  } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
416  FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
417  SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
418  }
419 
420  if (!FirstDst || !SecondDst)
421  return false;
422 
423  // Try to limit clustering based on the total number of bytes loaded
424  // rather than the number of instructions. This is done to help reduce
425  // register pressure. The method used is somewhat inexact, though,
426  // because it assumes that all loads in the cluster will load the
427  // same number of bytes as FirstLdSt.
428 
429  // The unit of this value is bytes.
430  // FIXME: This needs finer tuning.
431  unsigned LoadClusterThreshold = 16;
432 
433  const MachineRegisterInfo &MRI =
434  FirstLdSt.getParent()->getParent()->getRegInfo();
435  const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
436 
437  return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
438 }
439 
442  const DebugLoc &DL, unsigned DestReg,
443  unsigned SrcReg, bool KillSrc) {
444  MachineFunction *MF = MBB.getParent();
445  DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
446  "illegal SGPR to VGPR copy",
447  DL, DS_Error);
448  LLVMContext &C = MF->getFunction().getContext();
449  C.diagnose(IllegalCopy);
450 
451  BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
452  .addReg(SrcReg, getKillRegState(KillSrc));
453 }
454 
457  const DebugLoc &DL, unsigned DestReg,
458  unsigned SrcReg, bool KillSrc) const {
459  const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
460 
461  if (RC == &AMDGPU::VGPR_32RegClass) {
462  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
463  AMDGPU::SReg_32RegClass.contains(SrcReg));
464  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
465  .addReg(SrcReg, getKillRegState(KillSrc));
466  return;
467  }
468 
469  if (RC == &AMDGPU::SReg_32_XM0RegClass ||
470  RC == &AMDGPU::SReg_32RegClass) {
471  if (SrcReg == AMDGPU::SCC) {
472  BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
473  .addImm(-1)
474  .addImm(0);
475  return;
476  }
477 
478  if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
479  reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
480  return;
481  }
482 
483  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
484  .addReg(SrcReg, getKillRegState(KillSrc));
485  return;
486  }
487 
488  if (RC == &AMDGPU::SReg_64RegClass) {
489  if (DestReg == AMDGPU::VCC) {
490  if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
491  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
492  .addReg(SrcReg, getKillRegState(KillSrc));
493  } else {
494  // FIXME: Hack until VReg_1 removed.
495  assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
496  BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
497  .addImm(0)
498  .addReg(SrcReg, getKillRegState(KillSrc));
499  }
500 
501  return;
502  }
503 
504  if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
505  reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
506  return;
507  }
508 
509  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
510  .addReg(SrcReg, getKillRegState(KillSrc));
511  return;
512  }
513 
514  if (DestReg == AMDGPU::SCC) {
515  assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
516  BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
517  .addReg(SrcReg, getKillRegState(KillSrc))
518  .addImm(0);
519  return;
520  }
521 
522  unsigned EltSize = 4;
523  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
524  if (RI.isSGPRClass(RC)) {
525  if (RI.getRegSizeInBits(*RC) > 32) {
526  Opcode = AMDGPU::S_MOV_B64;
527  EltSize = 8;
528  } else {
529  Opcode = AMDGPU::S_MOV_B32;
530  EltSize = 4;
531  }
532 
533  if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
534  reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
535  return;
536  }
537  }
538 
539  ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
540  bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
541 
542  for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
543  unsigned SubIdx;
544  if (Forward)
545  SubIdx = SubIndices[Idx];
546  else
547  SubIdx = SubIndices[SubIndices.size() - Idx - 1];
548 
549  MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
550  get(Opcode), RI.getSubReg(DestReg, SubIdx));
551 
552  Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
553 
554  if (Idx == 0)
555  Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
556 
557  bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
558  Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit);
559  }
560 }
561 
562 int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
563  int NewOpc;
564 
565  // Try to map original to commuted opcode
566  NewOpc = AMDGPU::getCommuteRev(Opcode);
567  if (NewOpc != -1)
568  // Check if the commuted (REV) opcode exists on the target.
569  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
570 
571  // Try to map commuted to original opcode
572  NewOpc = AMDGPU::getCommuteOrig(Opcode);
573  if (NewOpc != -1)
574  // Check if the original (non-REV) opcode exists on the target.
575  return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
576 
577  return Opcode;
578 }
579 
582  const DebugLoc &DL, unsigned DestReg,
583  int64_t Value) const {
585  const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
586  if (RegClass == &AMDGPU::SReg_32RegClass ||
587  RegClass == &AMDGPU::SGPR_32RegClass ||
588  RegClass == &AMDGPU::SReg_32_XM0RegClass ||
589  RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
590  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
591  .addImm(Value);
592  return;
593  }
594 
595  if (RegClass == &AMDGPU::SReg_64RegClass ||
596  RegClass == &AMDGPU::SGPR_64RegClass ||
597  RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
598  BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
599  .addImm(Value);
600  return;
601  }
602 
603  if (RegClass == &AMDGPU::VGPR_32RegClass) {
604  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
605  .addImm(Value);
606  return;
607  }
608  if (RegClass == &AMDGPU::VReg_64RegClass) {
609  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
610  .addImm(Value);
611  return;
612  }
613 
614  unsigned EltSize = 4;
615  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
616  if (RI.isSGPRClass(RegClass)) {
617  if (RI.getRegSizeInBits(*RegClass) > 32) {
618  Opcode = AMDGPU::S_MOV_B64;
619  EltSize = 8;
620  } else {
621  Opcode = AMDGPU::S_MOV_B32;
622  EltSize = 4;
623  }
624  }
625 
626  ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
627  for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
628  int64_t IdxValue = Idx == 0 ? Value : 0;
629 
630  MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
631  get(Opcode), RI.getSubReg(DestReg, Idx));
632  Builder.addImm(IdxValue);
633  }
634 }
635 
636 const TargetRegisterClass *
638  return &AMDGPU::VGPR_32RegClass;
639 }
640 
643  const DebugLoc &DL, unsigned DstReg,
645  unsigned TrueReg,
646  unsigned FalseReg) const {
648  assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
649  "Not a VGPR32 reg");
650 
651  if (Cond.size() == 1) {
652  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
653  BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
654  .add(Cond[0]);
655  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
656  .addReg(FalseReg)
657  .addReg(TrueReg)
658  .addReg(SReg);
659  } else if (Cond.size() == 2) {
660  assert(Cond[0].isImm() && "Cond[0] is not an immediate");
661  switch (Cond[0].getImm()) {
662  case SIInstrInfo::SCC_TRUE: {
663  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
664  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
665  .addImm(-1)
666  .addImm(0);
667  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
668  .addReg(FalseReg)
669  .addReg(TrueReg)
670  .addReg(SReg);
671  break;
672  }
673  case SIInstrInfo::SCC_FALSE: {
674  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
675  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
676  .addImm(0)
677  .addImm(-1);
678  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
679  .addReg(FalseReg)
680  .addReg(TrueReg)
681  .addReg(SReg);
682  break;
683  }
684  case SIInstrInfo::VCCNZ: {
685  MachineOperand RegOp = Cond[1];
686  RegOp.setImplicit(false);
687  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
688  BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
689  .add(RegOp);
690  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
691  .addReg(FalseReg)
692  .addReg(TrueReg)
693  .addReg(SReg);
694  break;
695  }
696  case SIInstrInfo::VCCZ: {
697  MachineOperand RegOp = Cond[1];
698  RegOp.setImplicit(false);
699  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
700  BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
701  .add(RegOp);
702  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
703  .addReg(TrueReg)
704  .addReg(FalseReg)
705  .addReg(SReg);
706  break;
707  }
708  case SIInstrInfo::EXECNZ: {
709  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
710  unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
711  BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
712  .addImm(0);
713  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
714  .addImm(-1)
715  .addImm(0);
716  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
717  .addReg(FalseReg)
718  .addReg(TrueReg)
719  .addReg(SReg);
720  break;
721  }
722  case SIInstrInfo::EXECZ: {
723  unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
724  unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
725  BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
726  .addImm(0);
727  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
728  .addImm(0)
729  .addImm(-1);
730  BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
731  .addReg(FalseReg)
732  .addReg(TrueReg)
733  .addReg(SReg);
734  llvm_unreachable("Unhandled branch predicate EXECZ");
735  break;
736  }
737  default:
738  llvm_unreachable("invalid branch predicate");
739  }
740  } else {
741  llvm_unreachable("Can only handle Cond size 1 or 2");
742  }
743 }
744 
747  const DebugLoc &DL,
748  unsigned SrcReg, int Value) const {
750  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
751  BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
752  .addImm(Value)
753  .addReg(SrcReg);
754 
755  return Reg;
756 }
757 
760  const DebugLoc &DL,
761  unsigned SrcReg, int Value) const {
763  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
764  BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
765  .addImm(Value)
766  .addReg(SrcReg);
767 
768  return Reg;
769 }
770 
771 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
772 
773  if (RI.getRegSizeInBits(*DstRC) == 32) {
774  return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
775  } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
776  return AMDGPU::S_MOV_B64;
777  } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
778  return AMDGPU::V_MOV_B64_PSEUDO;
779  }
780  return AMDGPU::COPY;
781 }
782 
783 static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
784  switch (Size) {
785  case 4:
786  return AMDGPU::SI_SPILL_S32_SAVE;
787  case 8:
788  return AMDGPU::SI_SPILL_S64_SAVE;
789  case 16:
790  return AMDGPU::SI_SPILL_S128_SAVE;
791  case 32:
792  return AMDGPU::SI_SPILL_S256_SAVE;
793  case 64:
794  return AMDGPU::SI_SPILL_S512_SAVE;
795  default:
796  llvm_unreachable("unknown register size");
797  }
798 }
799 
800 static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
801  switch (Size) {
802  case 4:
803  return AMDGPU::SI_SPILL_V32_SAVE;
804  case 8:
805  return AMDGPU::SI_SPILL_V64_SAVE;
806  case 12:
807  return AMDGPU::SI_SPILL_V96_SAVE;
808  case 16:
809  return AMDGPU::SI_SPILL_V128_SAVE;
810  case 32:
811  return AMDGPU::SI_SPILL_V256_SAVE;
812  case 64:
813  return AMDGPU::SI_SPILL_V512_SAVE;
814  default:
815  llvm_unreachable("unknown register size");
816  }
817 }
818 
821  unsigned SrcReg, bool isKill,
822  int FrameIndex,
823  const TargetRegisterClass *RC,
824  const TargetRegisterInfo *TRI) const {
825  MachineFunction *MF = MBB.getParent();
827  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
828  DebugLoc DL = MBB.findDebugLoc(MI);
829 
830  assert(SrcReg != MFI->getStackPtrOffsetReg() &&
831  SrcReg != MFI->getFrameOffsetReg() &&
832  SrcReg != MFI->getScratchWaveOffsetReg());
833 
834  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
835  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
836  MachinePointerInfo PtrInfo
837  = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
838  MachineMemOperand *MMO
840  Size, Align);
841  unsigned SpillSize = TRI->getSpillSize(*RC);
842 
843  if (RI.isSGPRClass(RC)) {
844  MFI->setHasSpilledSGPRs();
845 
846  // We are only allowed to create one new instruction when spilling
847  // registers, so we need to use pseudo instruction for spilling SGPRs.
848  const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
849 
850  // The SGPR spill/restore instructions only work on number sgprs, so we need
851  // to make sure we are using the correct register class.
852  if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
854  MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
855  }
856 
857  MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
858  .addReg(SrcReg, getKillRegState(isKill)) // data
859  .addFrameIndex(FrameIndex) // addr
860  .addMemOperand(MMO)
862  .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
863  // Add the scratch resource registers as implicit uses because we may end up
864  // needing them, and need to ensure that the reserved registers are
865  // correctly handled.
866 
867  FrameInfo.setStackID(FrameIndex, 1);
868  if (ST.hasScalarStores()) {
869  // m0 is used for offset to scalar stores if used to spill.
870  Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
871  }
872 
873  return;
874  }
875 
876  if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
877  LLVMContext &Ctx = MF->getFunction().getContext();
878  Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
879  " spill register");
880  BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
881  .addReg(SrcReg);
882 
883  return;
884  }
885 
886  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
887 
888  unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
889  MFI->setHasSpilledVGPRs();
890  BuildMI(MBB, MI, DL, get(Opcode))
891  .addReg(SrcReg, getKillRegState(isKill)) // data
892  .addFrameIndex(FrameIndex) // addr
893  .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
894  .addReg(MFI->getFrameOffsetReg()) // scratch_offset
895  .addImm(0) // offset
896  .addMemOperand(MMO);
897 }
898 
899 static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
900  switch (Size) {
901  case 4:
902  return AMDGPU::SI_SPILL_S32_RESTORE;
903  case 8:
904  return AMDGPU::SI_SPILL_S64_RESTORE;
905  case 16:
906  return AMDGPU::SI_SPILL_S128_RESTORE;
907  case 32:
908  return AMDGPU::SI_SPILL_S256_RESTORE;
909  case 64:
910  return AMDGPU::SI_SPILL_S512_RESTORE;
911  default:
912  llvm_unreachable("unknown register size");
913  }
914 }
915 
916 static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
917  switch (Size) {
918  case 4:
919  return AMDGPU::SI_SPILL_V32_RESTORE;
920  case 8:
921  return AMDGPU::SI_SPILL_V64_RESTORE;
922  case 12:
923  return AMDGPU::SI_SPILL_V96_RESTORE;
924  case 16:
925  return AMDGPU::SI_SPILL_V128_RESTORE;
926  case 32:
927  return AMDGPU::SI_SPILL_V256_RESTORE;
928  case 64:
929  return AMDGPU::SI_SPILL_V512_RESTORE;
930  default:
931  llvm_unreachable("unknown register size");
932  }
933 }
934 
937  unsigned DestReg, int FrameIndex,
938  const TargetRegisterClass *RC,
939  const TargetRegisterInfo *TRI) const {
940  MachineFunction *MF = MBB.getParent();
942  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
943  DebugLoc DL = MBB.findDebugLoc(MI);
944  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
945  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
946  unsigned SpillSize = TRI->getSpillSize(*RC);
947 
948  MachinePointerInfo PtrInfo
949  = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
950 
952  PtrInfo, MachineMemOperand::MOLoad, Size, Align);
953 
954  if (RI.isSGPRClass(RC)) {
955  // FIXME: Maybe this should not include a memoperand because it will be
956  // lowered to non-memory instructions.
957  const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
958  if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
960  MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
961  }
962 
963  FrameInfo.setStackID(FrameIndex, 1);
964  MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
965  .addFrameIndex(FrameIndex) // addr
966  .addMemOperand(MMO)
968  .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
969 
970  if (ST.hasScalarStores()) {
971  // m0 is used for offset to scalar stores if used to spill.
972  Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
973  }
974 
975  return;
976  }
977 
978  if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
979  LLVMContext &Ctx = MF->getFunction().getContext();
980  Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
981  " restore register");
982  BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
983 
984  return;
985  }
986 
987  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
988 
989  unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
990  BuildMI(MBB, MI, DL, get(Opcode), DestReg)
991  .addFrameIndex(FrameIndex) // vaddr
992  .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
993  .addReg(MFI->getFrameOffsetReg()) // scratch_offset
994  .addImm(0) // offset
995  .addMemOperand(MMO);
996 }
997 
998 /// \param @Offset Offset in bytes of the FrameIndex being spilled
1000  MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
1001  unsigned FrameOffset, unsigned Size) const {
1002  MachineFunction *MF = MBB.getParent();
1004  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
1005  DebugLoc DL = MBB.findDebugLoc(MI);
1006  unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
1007  unsigned WavefrontSize = ST.getWavefrontSize();
1008 
1009  unsigned TIDReg = MFI->getTIDReg();
1010  if (!MFI->hasCalculatedTID()) {
1011  MachineBasicBlock &Entry = MBB.getParent()->front();
1012  MachineBasicBlock::iterator Insert = Entry.front();
1013  DebugLoc DL = Insert->getDebugLoc();
1014 
1015  TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
1016  *MF);
1017  if (TIDReg == AMDGPU::NoRegister)
1018  return TIDReg;
1019 
1021  WorkGroupSize > WavefrontSize) {
1022  unsigned TIDIGXReg
1024  unsigned TIDIGYReg
1026  unsigned TIDIGZReg
1028  unsigned InputPtrReg =
1030  for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
1031  if (!Entry.isLiveIn(Reg))
1032  Entry.addLiveIn(Reg);
1033  }
1034 
1035  RS->enterBasicBlock(Entry);
1036  // FIXME: Can we scavenge an SReg_64 and access the subregs?
1037  unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
1038  unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
1039  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
1040  .addReg(InputPtrReg)
1042  BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
1043  .addReg(InputPtrReg)
1045 
1046  // NGROUPS.X * NGROUPS.Y
1047  BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
1048  .addReg(STmp1)
1049  .addReg(STmp0);
1050  // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
1051  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
1052  .addReg(STmp1)
1053  .addReg(TIDIGXReg);
1054  // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
1055  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
1056  .addReg(STmp0)
1057  .addReg(TIDIGYReg)
1058  .addReg(TIDReg);
1059  // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
1060  getAddNoCarry(Entry, Insert, DL, TIDReg)
1061  .addReg(TIDReg)
1062  .addReg(TIDIGZReg);
1063  } else {
1064  // Get the wave id
1065  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
1066  TIDReg)
1067  .addImm(-1)
1068  .addImm(0);
1069 
1070  BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
1071  TIDReg)
1072  .addImm(-1)
1073  .addReg(TIDReg);
1074  }
1075 
1076  BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
1077  TIDReg)
1078  .addImm(2)
1079  .addReg(TIDReg);
1080  MFI->setTIDReg(TIDReg);
1081  }
1082 
1083  // Add FrameIndex to LDS offset
1084  unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
1085  getAddNoCarry(MBB, MI, DL, TmpReg)
1086  .addImm(LDSOffset)
1087  .addReg(TIDReg);
1088 
1089  return TmpReg;
1090 }
1091 
1094  int Count) const {
1095  DebugLoc DL = MBB.findDebugLoc(MI);
1096  while (Count > 0) {
1097  int Arg;
1098  if (Count >= 8)
1099  Arg = 7;
1100  else
1101  Arg = Count - 1;
1102  Count -= 8;
1103  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
1104  .addImm(Arg);
1105  }
1106 }
1107 
1110  insertWaitStates(MBB, MI, 1);
1111 }
1112 
1114  auto MF = MBB.getParent();
1115  SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1116 
1117  assert(Info->isEntryFunction());
1118 
1119  if (MBB.succ_empty()) {
1120  bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
1121  if (HasNoTerminator)
1122  BuildMI(MBB, MBB.end(), DebugLoc(),
1123  get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
1124  }
1125 }
1126 
1128  switch (MI.getOpcode()) {
1129  default: return 1; // FIXME: Do wait states equal cycles?
1130 
1131  case AMDGPU::S_NOP:
1132  return MI.getOperand(0).getImm() + 1;
1133  }
1134 }
1135 
1137  MachineBasicBlock &MBB = *MI.getParent();
1138  DebugLoc DL = MBB.findDebugLoc(MI);
1139  switch (MI.getOpcode()) {
1140  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
1141  case AMDGPU::S_MOV_B64_term:
1142  // This is only a terminator to get the correct spill code placement during
1143  // register allocation.
1144  MI.setDesc(get(AMDGPU::S_MOV_B64));
1145  break;
1146 
1147  case AMDGPU::S_XOR_B64_term:
1148  // This is only a terminator to get the correct spill code placement during
1149  // register allocation.
1150  MI.setDesc(get(AMDGPU::S_XOR_B64));
1151  break;
1152 
1153  case AMDGPU::S_ANDN2_B64_term:
1154  // This is only a terminator to get the correct spill code placement during
1155  // register allocation.
1156  MI.setDesc(get(AMDGPU::S_ANDN2_B64));
1157  break;
1158 
1159  case AMDGPU::V_MOV_B64_PSEUDO: {
1160  unsigned Dst = MI.getOperand(0).getReg();
1161  unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1162  unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1163 
1164  const MachineOperand &SrcOp = MI.getOperand(1);
1165  // FIXME: Will this work for 64-bit floating point immediates?
1166  assert(!SrcOp.isFPImm());
1167  if (SrcOp.isImm()) {
1168  APInt Imm(64, SrcOp.getImm());
1169  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
1170  .addImm(Imm.getLoBits(32).getZExtValue())
1171  .addReg(Dst, RegState::Implicit | RegState::Define);
1172  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
1173  .addImm(Imm.getHiBits(32).getZExtValue())
1174  .addReg(Dst, RegState::Implicit | RegState::Define);
1175  } else {
1176  assert(SrcOp.isReg());
1177  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
1178  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
1180  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
1181  .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
1183  }
1184  MI.eraseFromParent();
1185  break;
1186  }
1187  case AMDGPU::V_SET_INACTIVE_B32: {
1188  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1189  .addReg(AMDGPU::EXEC);
1190  BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
1191  .add(MI.getOperand(2));
1192  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1193  .addReg(AMDGPU::EXEC);
1194  MI.eraseFromParent();
1195  break;
1196  }
1197  case AMDGPU::V_SET_INACTIVE_B64: {
1198  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1199  .addReg(AMDGPU::EXEC);
1200  MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
1201  MI.getOperand(0).getReg())
1202  .add(MI.getOperand(2));
1203  expandPostRAPseudo(*Copy);
1204  BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1205  .addReg(AMDGPU::EXEC);
1206  MI.eraseFromParent();
1207  break;
1208  }
1209  case AMDGPU::V_MOVRELD_B32_V1:
1210  case AMDGPU::V_MOVRELD_B32_V2:
1211  case AMDGPU::V_MOVRELD_B32_V4:
1212  case AMDGPU::V_MOVRELD_B32_V8:
1213  case AMDGPU::V_MOVRELD_B32_V16: {
1214  const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
1215  unsigned VecReg = MI.getOperand(0).getReg();
1216  bool IsUndef = MI.getOperand(1).isUndef();
1217  unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
1218  assert(VecReg == MI.getOperand(1).getReg());
1219 
1220  MachineInstr *MovRel =
1221  BuildMI(MBB, MI, DL, MovRelDesc)
1222  .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
1223  .add(MI.getOperand(2))
1224  .addReg(VecReg, RegState::ImplicitDefine)
1225  .addReg(VecReg,
1226  RegState::Implicit | (IsUndef ? RegState::Undef : 0));
1227 
1228  const int ImpDefIdx =
1229  MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
1230  const int ImpUseIdx = ImpDefIdx + 1;
1231  MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
1232 
1233  MI.eraseFromParent();
1234  break;
1235  }
1236  case AMDGPU::SI_PC_ADD_REL_OFFSET: {
1237  MachineFunction &MF = *MBB.getParent();
1238  unsigned Reg = MI.getOperand(0).getReg();
1239  unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
1240  unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
1241 
1242  // Create a bundle so these instructions won't be re-ordered by the
1243  // post-RA scheduler.
1244  MIBundleBuilder Bundler(MBB, MI);
1245  Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
1246 
1247  // Add 32-bit offset from this instruction to the start of the
1248  // constant data.
1249  Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
1250  .addReg(RegLo)
1251  .add(MI.getOperand(1)));
1252 
1253  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
1254  .addReg(RegHi);
1256  MIB.addImm(0);
1257  else
1258  MIB.add(MI.getOperand(2));
1259 
1260  Bundler.append(MIB);
1261  finalizeBundle(MBB, Bundler.begin());
1262 
1263  MI.eraseFromParent();
1264  break;
1265  }
1266  case AMDGPU::EXIT_WWM: {
1267  // This only gets its own opcode so that SIFixWWMLiveness can tell when WWM
1268  // is exited.
1269  MI.setDesc(get(AMDGPU::S_MOV_B64));
1270  break;
1271  }
1272  }
1273  return true;
1274 }
1275 
1277  MachineOperand &Src0,
1278  unsigned Src0OpName,
1279  MachineOperand &Src1,
1280  unsigned Src1OpName) const {
1281  MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
1282  if (!Src0Mods)
1283  return false;
1284 
1285  MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
1286  assert(Src1Mods &&
1287  "All commutable instructions have both src0 and src1 modifiers");
1288 
1289  int Src0ModsVal = Src0Mods->getImm();
1290  int Src1ModsVal = Src1Mods->getImm();
1291 
1292  Src1Mods->setImm(Src0ModsVal);
1293  Src0Mods->setImm(Src1ModsVal);
1294  return true;
1295 }
1296 
1298  MachineOperand &RegOp,
1299  MachineOperand &NonRegOp) {
1300  unsigned Reg = RegOp.getReg();
1301  unsigned SubReg = RegOp.getSubReg();
1302  bool IsKill = RegOp.isKill();
1303  bool IsDead = RegOp.isDead();
1304  bool IsUndef = RegOp.isUndef();
1305  bool IsDebug = RegOp.isDebug();
1306 
1307  if (NonRegOp.isImm())
1308  RegOp.ChangeToImmediate(NonRegOp.getImm());
1309  else if (NonRegOp.isFI())
1310  RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
1311  else
1312  return nullptr;
1313 
1314  NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
1315  NonRegOp.setSubReg(SubReg);
1316 
1317  return &MI;
1318 }
1319 
1321  unsigned Src0Idx,
1322  unsigned Src1Idx) const {
1323  assert(!NewMI && "this should never be used");
1324 
1325  unsigned Opc = MI.getOpcode();
1326  int CommutedOpcode = commuteOpcode(Opc);
1327  if (CommutedOpcode == -1)
1328  return nullptr;
1329 
1330  assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
1331  static_cast<int>(Src0Idx) &&
1332  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
1333  static_cast<int>(Src1Idx) &&
1334  "inconsistency with findCommutedOpIndices");
1335 
1336  MachineOperand &Src0 = MI.getOperand(Src0Idx);
1337  MachineOperand &Src1 = MI.getOperand(Src1Idx);
1338 
1339  MachineInstr *CommutedMI = nullptr;
1340  if (Src0.isReg() && Src1.isReg()) {
1341  if (isOperandLegal(MI, Src1Idx, &Src0)) {
1342  // Be sure to copy the source modifiers to the right place.
1343  CommutedMI
1344  = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
1345  }
1346 
1347  } else if (Src0.isReg() && !Src1.isReg()) {
1348  // src0 should always be able to support any operand type, so no need to
1349  // check operand legality.
1350  CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
1351  } else if (!Src0.isReg() && Src1.isReg()) {
1352  if (isOperandLegal(MI, Src1Idx, &Src0))
1353  CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
1354  } else {
1355  // FIXME: Found two non registers to commute. This does happen.
1356  return nullptr;
1357  }
1358 
1359  if (CommutedMI) {
1360  swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
1361  Src1, AMDGPU::OpName::src1_modifiers);
1362 
1363  CommutedMI->setDesc(get(CommutedOpcode));
1364  }
1365 
1366  return CommutedMI;
1367 }
1368 
1369 // This needs to be implemented because the source modifiers may be inserted
1370 // between the true commutable operands, and the base
1371 // TargetInstrInfo::commuteInstruction uses it.
1373  unsigned &SrcOpIdx1) const {
1374  if (!MI.isCommutable())
1375  return false;
1376 
1377  unsigned Opc = MI.getOpcode();
1378  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1379  if (Src0Idx == -1)
1380  return false;
1381 
1382  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1383  if (Src1Idx == -1)
1384  return false;
1385 
1386  return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
1387 }
1388 
1389 bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1390  int64_t BrOffset) const {
1391  // BranchRelaxation should never have to check s_setpc_b64 because its dest
1392  // block is unanalyzable.
1393  assert(BranchOp != AMDGPU::S_SETPC_B64);
1394 
1395  // Convert to dwords.
1396  BrOffset /= 4;
1397 
1398  // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
1399  // from the next instruction.
1400  BrOffset -= 1;
1401 
1402  return isIntN(BranchOffsetBits, BrOffset);
1403 }
1404 
1406  const MachineInstr &MI) const {
1407  if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
1408  // This would be a difficult analysis to perform, but can always be legal so
1409  // there's no need to analyze it.
1410  return nullptr;
1411  }
1412 
1413  return MI.getOperand(0).getMBB();
1414 }
1415 
1417  MachineBasicBlock &DestBB,
1418  const DebugLoc &DL,
1419  int64_t BrOffset,
1420  RegScavenger *RS) const {
1421  assert(RS && "RegScavenger required for long branching");
1422  assert(MBB.empty() &&
1423  "new block should be inserted for expanding unconditional branch");
1424  assert(MBB.pred_size() == 1);
1425 
1426  MachineFunction *MF = MBB.getParent();
1427  MachineRegisterInfo &MRI = MF->getRegInfo();
1428 
1429  // FIXME: Virtual register workaround for RegScavenger not working with empty
1430  // blocks.
1431  unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1432 
1433  auto I = MBB.end();
1434 
1435  // We need to compute the offset relative to the instruction immediately after
1436  // s_getpc_b64. Insert pc arithmetic code before last terminator.
1437  MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
1438 
1439  // TODO: Handle > 32-bit block address.
1440  if (BrOffset >= 0) {
1441  BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
1442  .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1443  .addReg(PCReg, 0, AMDGPU::sub0)
1445  BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
1446  .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1447  .addReg(PCReg, 0, AMDGPU::sub1)
1448  .addImm(0);
1449  } else {
1450  // Backwards branch.
1451  BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
1452  .addReg(PCReg, RegState::Define, AMDGPU::sub0)
1453  .addReg(PCReg, 0, AMDGPU::sub0)
1455  BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
1456  .addReg(PCReg, RegState::Define, AMDGPU::sub1)
1457  .addReg(PCReg, 0, AMDGPU::sub1)
1458  .addImm(0);
1459  }
1460 
1461  // Insert the indirect branch after the other terminator.
1462  BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
1463  .addReg(PCReg);
1464 
1465  // FIXME: If spilling is necessary, this will fail because this scavenger has
1466  // no emergency stack slots. It is non-trivial to spill in this situation,
1467  // because the restore code needs to be specially placed after the
1468  // jump. BranchRelaxation then needs to be made aware of the newly inserted
1469  // block.
1470  //
1471  // If a spill is needed for the pc register pair, we need to insert a spill
1472  // restore block right before the destination block, and insert a short branch
1473  // into the old destination block's fallthrough predecessor.
1474  // e.g.:
1475  //
1476  // s_cbranch_scc0 skip_long_branch:
1477  //
1478  // long_branch_bb:
1479  // spill s[8:9]
1480  // s_getpc_b64 s[8:9]
1481  // s_add_u32 s8, s8, restore_bb
1482  // s_addc_u32 s9, s9, 0
1483  // s_setpc_b64 s[8:9]
1484  //
1485  // skip_long_branch:
1486  // foo;
1487  //
1488  // .....
1489  //
1490  // dest_bb_fallthrough_predecessor:
1491  // bar;
1492  // s_branch dest_bb
1493  //
1494  // restore_bb:
1495  // restore s[8:9]
1496  // fallthrough dest_bb
1497  ///
1498  // dest_bb:
1499  // buzz;
1500 
1501  RS->enterBasicBlockEnd(MBB);
1502  unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
1503  MachineBasicBlock::iterator(GetPC), 0);
1504  MRI.replaceRegWith(PCReg, Scav);
1505  MRI.clearVirtRegs();
1506  RS->setRegUsed(Scav);
1507 
1508  return 4 + 8 + 4 + 4;
1509 }
1510 
1511 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
1512  switch (Cond) {
1513  case SIInstrInfo::SCC_TRUE:
1514  return AMDGPU::S_CBRANCH_SCC1;
1515  case SIInstrInfo::SCC_FALSE:
1516  return AMDGPU::S_CBRANCH_SCC0;
1517  case SIInstrInfo::VCCNZ:
1518  return AMDGPU::S_CBRANCH_VCCNZ;
1519  case SIInstrInfo::VCCZ:
1520  return AMDGPU::S_CBRANCH_VCCZ;
1521  case SIInstrInfo::EXECNZ:
1522  return AMDGPU::S_CBRANCH_EXECNZ;
1523  case SIInstrInfo::EXECZ:
1524  return AMDGPU::S_CBRANCH_EXECZ;
1525  default:
1526  llvm_unreachable("invalid branch predicate");
1527  }
1528 }
1529 
1530 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
1531  switch (Opcode) {
1532  case AMDGPU::S_CBRANCH_SCC0:
1533  return SCC_FALSE;
1534  case AMDGPU::S_CBRANCH_SCC1:
1535  return SCC_TRUE;
1536  case AMDGPU::S_CBRANCH_VCCNZ:
1537  return VCCNZ;
1538  case AMDGPU::S_CBRANCH_VCCZ:
1539  return VCCZ;
1540  case AMDGPU::S_CBRANCH_EXECNZ:
1541  return EXECNZ;
1542  case AMDGPU::S_CBRANCH_EXECZ:
1543  return EXECZ;
1544  default:
1545  return INVALID_BR;
1546  }
1547 }
1548 
1551  MachineBasicBlock *&TBB,
1552  MachineBasicBlock *&FBB,
1554  bool AllowModify) const {
1555  if (I->getOpcode() == AMDGPU::S_BRANCH) {
1556  // Unconditional Branch
1557  TBB = I->getOperand(0).getMBB();
1558  return false;
1559  }
1560 
1561  MachineBasicBlock *CondBB = nullptr;
1562 
1563  if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
1564  CondBB = I->getOperand(1).getMBB();
1565  Cond.push_back(I->getOperand(0));
1566  } else {
1567  BranchPredicate Pred = getBranchPredicate(I->getOpcode());
1568  if (Pred == INVALID_BR)
1569  return true;
1570 
1571  CondBB = I->getOperand(0).getMBB();
1573  Cond.push_back(I->getOperand(1)); // Save the branch register.
1574  }
1575  ++I;
1576 
1577  if (I == MBB.end()) {
1578  // Conditional branch followed by fall-through.
1579  TBB = CondBB;
1580  return false;
1581  }
1582 
1583  if (I->getOpcode() == AMDGPU::S_BRANCH) {
1584  TBB = CondBB;
1585  FBB = I->getOperand(0).getMBB();
1586  return false;
1587  }
1588 
1589  return true;
1590 }
1591 
1593  MachineBasicBlock *&FBB,
1595  bool AllowModify) const {
1597  if (I == MBB.end())
1598  return false;
1599 
1600  if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
1601  return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
1602 
1603  ++I;
1604 
1605  // TODO: Should be able to treat as fallthrough?
1606  if (I == MBB.end())
1607  return true;
1608 
1609  if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
1610  return true;
1611 
1612  MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
1613 
1614  // Specifically handle the case where the conditional branch is to the same
1615  // destination as the mask branch. e.g.
1616  //
1617  // si_mask_branch BB8
1618  // s_cbranch_execz BB8
1619  // s_cbranch BB9
1620  //
1621  // This is required to understand divergent loops which may need the branches
1622  // to be relaxed.
1623  if (TBB != MaskBrDest || Cond.empty())
1624  return true;
1625 
1626  auto Pred = Cond[0].getImm();
1627  return (Pred != EXECZ && Pred != EXECNZ);
1628 }
1629 
1631  int *BytesRemoved) const {
1633 
1634  unsigned Count = 0;
1635  unsigned RemovedSize = 0;
1636  while (I != MBB.end()) {
1637  MachineBasicBlock::iterator Next = std::next(I);
1638  if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
1639  I = Next;
1640  continue;
1641  }
1642 
1643  RemovedSize += getInstSizeInBytes(*I);
1644  I->eraseFromParent();
1645  ++Count;
1646  I = Next;
1647  }
1648 
1649  if (BytesRemoved)
1650  *BytesRemoved = RemovedSize;
1651 
1652  return Count;
1653 }
1654 
1655 // Copy the flags onto the implicit condition register operand.
1657  const MachineOperand &OrigCond) {
1658  CondReg.setIsUndef(OrigCond.isUndef());
1659  CondReg.setIsKill(OrigCond.isKill());
1660 }
1661 
1663  MachineBasicBlock *TBB,
1664  MachineBasicBlock *FBB,
1666  const DebugLoc &DL,
1667  int *BytesAdded) const {
1668  if (!FBB && Cond.empty()) {
1669  BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1670  .addMBB(TBB);
1671  if (BytesAdded)
1672  *BytesAdded = 4;
1673  return 1;
1674  }
1675 
1676  if(Cond.size() == 1 && Cond[0].isReg()) {
1677  BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
1678  .add(Cond[0])
1679  .addMBB(TBB);
1680  return 1;
1681  }
1682 
1683  assert(TBB && Cond[0].isImm());
1684 
1685  unsigned Opcode
1686  = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
1687 
1688  if (!FBB) {
1689  Cond[1].isUndef();
1690  MachineInstr *CondBr =
1691  BuildMI(&MBB, DL, get(Opcode))
1692  .addMBB(TBB);
1693 
1694  // Copy the flags onto the implicit condition register operand.
1695  preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
1696 
1697  if (BytesAdded)
1698  *BytesAdded = 4;
1699  return 1;
1700  }
1701 
1702  assert(TBB && FBB);
1703 
1704  MachineInstr *CondBr =
1705  BuildMI(&MBB, DL, get(Opcode))
1706  .addMBB(TBB);
1707  BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
1708  .addMBB(FBB);
1709 
1710  MachineOperand &CondReg = CondBr->getOperand(1);
1711  CondReg.setIsUndef(Cond[1].isUndef());
1712  CondReg.setIsKill(Cond[1].isKill());
1713 
1714  if (BytesAdded)
1715  *BytesAdded = 8;
1716 
1717  return 2;
1718 }
1719 
1721  SmallVectorImpl<MachineOperand> &Cond) const {
1722  if (Cond.size() != 2) {
1723  return true;
1724  }
1725 
1726  if (Cond[0].isImm()) {
1727  Cond[0].setImm(-Cond[0].getImm());
1728  return false;
1729  }
1730 
1731  return true;
1732 }
1733 
1736  unsigned TrueReg, unsigned FalseReg,
1737  int &CondCycles,
1738  int &TrueCycles, int &FalseCycles) const {
1739  switch (Cond[0].getImm()) {
1740  case VCCNZ:
1741  case VCCZ: {
1742  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1743  const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1744  assert(MRI.getRegClass(FalseReg) == RC);
1745 
1746  int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1747  CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1748 
1749  // Limit to equal cost for branch vs. N v_cndmask_b32s.
1750  return !RI.isSGPRClass(RC) && NumInsts <= 6;
1751  }
1752  case SCC_TRUE:
1753  case SCC_FALSE: {
1754  // FIXME: We could insert for VGPRs if we could replace the original compare
1755  // with a vector one.
1756  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1757  const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
1758  assert(MRI.getRegClass(FalseReg) == RC);
1759 
1760  int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
1761 
1762  // Multiples of 8 can do s_cselect_b64
1763  if (NumInsts % 2 == 0)
1764  NumInsts /= 2;
1765 
1766  CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
1767  return RI.isSGPRClass(RC);
1768  }
1769  default:
1770  return false;
1771  }
1772 }
1773 
1776  unsigned DstReg, ArrayRef<MachineOperand> Cond,
1777  unsigned TrueReg, unsigned FalseReg) const {
1778  BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
1779  if (Pred == VCCZ || Pred == SCC_FALSE) {
1780  Pred = static_cast<BranchPredicate>(-Pred);
1781  std::swap(TrueReg, FalseReg);
1782  }
1783 
1785  const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
1786  unsigned DstSize = RI.getRegSizeInBits(*DstRC);
1787 
1788  if (DstSize == 32) {
1789  unsigned SelOp = Pred == SCC_TRUE ?
1790  AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
1791 
1792  // Instruction's operands are backwards from what is expected.
1793  MachineInstr *Select =
1794  BuildMI(MBB, I, DL, get(SelOp), DstReg)
1795  .addReg(FalseReg)
1796  .addReg(TrueReg);
1797 
1798  preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1799  return;
1800  }
1801 
1802  if (DstSize == 64 && Pred == SCC_TRUE) {
1803  MachineInstr *Select =
1804  BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
1805  .addReg(FalseReg)
1806  .addReg(TrueReg);
1807 
1808  preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1809  return;
1810  }
1811 
1812  static const int16_t Sub0_15[] = {
1813  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1814  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1815  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1816  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1817  };
1818 
1819  static const int16_t Sub0_15_64[] = {
1820  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1821  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1822  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1823  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1824  };
1825 
1826  unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
1827  const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
1828  const int16_t *SubIndices = Sub0_15;
1829  int NElts = DstSize / 32;
1830 
1831  // 64-bit select is only avaialble for SALU.
1832  if (Pred == SCC_TRUE) {
1833  SelOp = AMDGPU::S_CSELECT_B64;
1834  EltRC = &AMDGPU::SGPR_64RegClass;
1835  SubIndices = Sub0_15_64;
1836 
1837  assert(NElts % 2 == 0);
1838  NElts /= 2;
1839  }
1840 
1842  MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
1843 
1844  I = MIB->getIterator();
1845 
1847  for (int Idx = 0; Idx != NElts; ++Idx) {
1848  unsigned DstElt = MRI.createVirtualRegister(EltRC);
1849  Regs.push_back(DstElt);
1850 
1851  unsigned SubIdx = SubIndices[Idx];
1852 
1853  MachineInstr *Select =
1854  BuildMI(MBB, I, DL, get(SelOp), DstElt)
1855  .addReg(FalseReg, 0, SubIdx)
1856  .addReg(TrueReg, 0, SubIdx);
1857  preserveCondRegFlags(Select->getOperand(3), Cond[1]);
1858 
1859  MIB.addReg(DstElt)
1860  .addImm(SubIdx);
1861  }
1862 }
1863 
1865  switch (MI.getOpcode()) {
1866  case AMDGPU::V_MOV_B32_e32:
1867  case AMDGPU::V_MOV_B32_e64:
1868  case AMDGPU::V_MOV_B64_PSEUDO: {
1869  // If there are additional implicit register operands, this may be used for
1870  // register indexing so the source register operand isn't simply copied.
1871  unsigned NumOps = MI.getDesc().getNumOperands() +
1872  MI.getDesc().getNumImplicitUses();
1873 
1874  return MI.getNumOperands() == NumOps;
1875  }
1876  case AMDGPU::S_MOV_B32:
1877  case AMDGPU::S_MOV_B64:
1878  case AMDGPU::COPY:
1879  return true;
1880  default:
1881  return false;
1882  }
1883 }
1884 
1887  switch(Kind) {
1890  return AMDGPUASI.PRIVATE_ADDRESS;
1897  return AMDGPUASI.CONSTANT_ADDRESS;
1898  }
1899  return AMDGPUASI.FLAT_ADDRESS;
1900 }
1901 
1903  unsigned Opc = MI.getOpcode();
1904  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1905  AMDGPU::OpName::src0_modifiers);
1906  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1907  AMDGPU::OpName::src1_modifiers);
1908  int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
1909  AMDGPU::OpName::src2_modifiers);
1910 
1911  MI.RemoveOperand(Src2ModIdx);
1912  MI.RemoveOperand(Src1ModIdx);
1913  MI.RemoveOperand(Src0ModIdx);
1914 }
1915 
1917  unsigned Reg, MachineRegisterInfo *MRI) const {
1918  if (!MRI->hasOneNonDBGUse(Reg))
1919  return false;
1920 
1921  switch (DefMI.getOpcode()) {
1922  default:
1923  return false;
1924  case AMDGPU::S_MOV_B64:
1925  // TODO: We could fold 64-bit immediates, but this get compilicated
1926  // when there are sub-registers.
1927  return false;
1928 
1929  case AMDGPU::V_MOV_B32_e32:
1930  case AMDGPU::S_MOV_B32:
1931  break;
1932  }
1933 
1934  const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
1935  assert(ImmOp);
1936  // FIXME: We could handle FrameIndex values here.
1937  if (!ImmOp->isImm())
1938  return false;
1939 
1940  unsigned Opc = UseMI.getOpcode();
1941  if (Opc == AMDGPU::COPY) {
1942  bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
1943  unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
1944  UseMI.setDesc(get(NewOpc));
1945  UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
1946  UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
1947  return true;
1948  }
1949 
1950  if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
1951  Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
1952  // Don't fold if we are using source or output modifiers. The new VOP2
1953  // instructions don't have them.
1954  if (hasAnyModifiersSet(UseMI))
1955  return false;
1956 
1957  // If this is a free constant, there's no reason to do this.
1958  // TODO: We could fold this here instead of letting SIFoldOperands do it
1959  // later.
1960  MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
1961 
1962  // Any src operand can be used for the legality check.
1963  if (isInlineConstant(UseMI, *Src0, *ImmOp))
1964  return false;
1965 
1966  bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
1967  MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
1968  MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
1969 
1970  // Multiplied part is the constant: Use v_madmk_{f16, f32}.
1971  // We should only expect these to be on src0 due to canonicalizations.
1972  if (Src0->isReg() && Src0->getReg() == Reg) {
1973  if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
1974  return false;
1975 
1976  if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
1977  return false;
1978 
1979  // We need to swap operands 0 and 1 since madmk constant is at operand 1.
1980 
1981  const int64_t Imm = ImmOp->getImm();
1982 
1983  // FIXME: This would be a lot easier if we could return a new instruction
1984  // instead of having to modify in place.
1985 
1986  // Remove these first since they are at the end.
1987  UseMI.RemoveOperand(
1988  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
1989  UseMI.RemoveOperand(
1990  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
1991 
1992  unsigned Src1Reg = Src1->getReg();
1993  unsigned Src1SubReg = Src1->getSubReg();
1994  Src0->setReg(Src1Reg);
1995  Src0->setSubReg(Src1SubReg);
1996  Src0->setIsKill(Src1->isKill());
1997 
1998  if (Opc == AMDGPU::V_MAC_F32_e64 ||
1999  Opc == AMDGPU::V_MAC_F16_e64)
2000  UseMI.untieRegOperand(
2001  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
2002 
2003  Src1->ChangeToImmediate(Imm);
2004 
2005  removeModOperands(UseMI);
2006  UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
2007 
2008  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
2009  if (DeleteDef)
2010  DefMI.eraseFromParent();
2011 
2012  return true;
2013  }
2014 
2015  // Added part is the constant: Use v_madak_{f16, f32}.
2016  if (Src2->isReg() && Src2->getReg() == Reg) {
2017  // Not allowed to use constant bus for another operand.
2018  // We can however allow an inline immediate as src0.
2019  if (!Src0->isImm() &&
2020  (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
2021  return false;
2022 
2023  if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
2024  return false;
2025 
2026  const int64_t Imm = ImmOp->getImm();
2027 
2028  // FIXME: This would be a lot easier if we could return a new instruction
2029  // instead of having to modify in place.
2030 
2031  // Remove these first since they are at the end.
2032  UseMI.RemoveOperand(
2033  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
2034  UseMI.RemoveOperand(
2035  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
2036 
2037  if (Opc == AMDGPU::V_MAC_F32_e64 ||
2038  Opc == AMDGPU::V_MAC_F16_e64)
2039  UseMI.untieRegOperand(
2040  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
2041 
2042  // ChangingToImmediate adds Src2 back to the instruction.
2043  Src2->ChangeToImmediate(Imm);
2044 
2045  // These come before src2.
2046  removeModOperands(UseMI);
2047  UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
2048 
2049  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
2050  if (DeleteDef)
2051  DefMI.eraseFromParent();
2052 
2053  return true;
2054  }
2055  }
2056 
2057  return false;
2058 }
2059 
2060 static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
2061  int WidthB, int OffsetB) {
2062  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
2063  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
2064  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2065  return LowOffset + LowWidth <= HighOffset;
2066 }
2067 
2068 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
2069  MachineInstr &MIb) const {
2070  unsigned BaseReg0, BaseReg1;
2071  int64_t Offset0, Offset1;
2072 
2073  if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
2074  getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
2075 
2076  if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
2077  // FIXME: Handle ds_read2 / ds_write2.
2078  return false;
2079  }
2080  unsigned Width0 = (*MIa.memoperands_begin())->getSize();
2081  unsigned Width1 = (*MIb.memoperands_begin())->getSize();
2082  if (BaseReg0 == BaseReg1 &&
2083  offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
2084  return true;
2085  }
2086  }
2087 
2088  return false;
2089 }
2090 
2092  MachineInstr &MIb,
2093  AliasAnalysis *AA) const {
2094  assert((MIa.mayLoad() || MIa.mayStore()) &&
2095  "MIa must load from or modify a memory location");
2096  assert((MIb.mayLoad() || MIb.mayStore()) &&
2097  "MIb must load from or modify a memory location");
2098 
2100  return false;
2101 
2102  // XXX - Can we relax this between address spaces?
2103  if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
2104  return false;
2105 
2106  if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
2107  const MachineMemOperand *MMOa = *MIa.memoperands_begin();
2108  const MachineMemOperand *MMOb = *MIb.memoperands_begin();
2109  if (MMOa->getValue() && MMOb->getValue()) {
2110  MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
2111  MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
2112  if (!AA->alias(LocA, LocB))
2113  return true;
2114  }
2115  }
2116 
2117  // TODO: Should we check the address space from the MachineMemOperand? That
2118  // would allow us to distinguish objects we know don't alias based on the
2119  // underlying address space, even if it was lowered to a different one,
2120  // e.g. private accesses lowered to use MUBUF instructions on a scratch
2121  // buffer.
2122  if (isDS(MIa)) {
2123  if (isDS(MIb))
2124  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2125 
2126  return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb);
2127  }
2128 
2129  if (isMUBUF(MIa) || isMTBUF(MIa)) {
2130  if (isMUBUF(MIb) || isMTBUF(MIb))
2131  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2132 
2133  return !isFLAT(MIb) && !isSMRD(MIb);
2134  }
2135 
2136  if (isSMRD(MIa)) {
2137  if (isSMRD(MIb))
2138  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2139 
2140  return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
2141  }
2142 
2143  if (isFLAT(MIa)) {
2144  if (isFLAT(MIb))
2145  return checkInstOffsetsDoNotOverlap(MIa, MIb);
2146 
2147  return false;
2148  }
2149 
2150  return false;
2151 }
2152 
2153 static int64_t getFoldableImm(const MachineOperand* MO) {
2154  if (!MO->isReg())
2155  return false;
2156  const MachineFunction *MF = MO->getParent()->getParent()->getParent();
2157  const MachineRegisterInfo &MRI = MF->getRegInfo();
2158  auto Def = MRI.getUniqueVRegDef(MO->getReg());
2159  if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
2160  Def->getOperand(1).isImm())
2161  return Def->getOperand(1).getImm();
2162  return AMDGPU::NoRegister;
2163 }
2164 
2166  MachineInstr &MI,
2167  LiveVariables *LV) const {
2168  bool IsF16 = false;
2169 
2170  switch (MI.getOpcode()) {
2171  default:
2172  return nullptr;
2173  case AMDGPU::V_MAC_F16_e64:
2174  IsF16 = true;
2176  case AMDGPU::V_MAC_F32_e64:
2177  break;
2178  case AMDGPU::V_MAC_F16_e32:
2179  IsF16 = true;
2181  case AMDGPU::V_MAC_F32_e32: {
2182  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
2183  AMDGPU::OpName::src0);
2184  const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
2185  if (!Src0->isReg() && !Src0->isImm())
2186  return nullptr;
2187 
2188  if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
2189  return nullptr;
2190 
2191  break;
2192  }
2193  }
2194 
2195  const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2196  const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
2197  const MachineOperand *Src0Mods =
2198  getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
2199  const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
2200  const MachineOperand *Src1Mods =
2201  getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
2202  const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
2203  const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
2204  const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
2205 
2206  if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
2207  // If we have an SGPR input, we will violate the constant bus restriction.
2208  (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
2209  if (auto Imm = getFoldableImm(Src2)) {
2210  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2211  get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
2212  .add(*Dst)
2213  .add(*Src0)
2214  .add(*Src1)
2215  .addImm(Imm);
2216  }
2217  if (auto Imm = getFoldableImm(Src1)) {
2218  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2219  get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
2220  .add(*Dst)
2221  .add(*Src0)
2222  .addImm(Imm)
2223  .add(*Src2);
2224  }
2225  if (auto Imm = getFoldableImm(Src0)) {
2226  if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
2227  AMDGPU::OpName::src0), Src1))
2228  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2229  get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
2230  .add(*Dst)
2231  .add(*Src1)
2232  .addImm(Imm)
2233  .add(*Src2);
2234  }
2235  }
2236 
2237  return BuildMI(*MBB, MI, MI.getDebugLoc(),
2238  get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
2239  .add(*Dst)
2240  .addImm(Src0Mods ? Src0Mods->getImm() : 0)
2241  .add(*Src0)
2242  .addImm(Src1Mods ? Src1Mods->getImm() : 0)
2243  .add(*Src1)
2244  .addImm(0) // Src mods
2245  .add(*Src2)
2246  .addImm(Clamp ? Clamp->getImm() : 0)
2247  .addImm(Omod ? Omod->getImm() : 0);
2248 }
2249 
2250 // It's not generally safe to move VALU instructions across these since it will
2251 // start using the register as a base index rather than directly.
2252 // XXX - Why isn't hasSideEffects sufficient for these?
2254  switch (MI.getOpcode()) {
2255  case AMDGPU::S_SET_GPR_IDX_ON:
2256  case AMDGPU::S_SET_GPR_IDX_MODE:
2257  case AMDGPU::S_SET_GPR_IDX_OFF:
2258  return true;
2259  default:
2260  return false;
2261  }
2262 }
2263 
2265  const MachineBasicBlock *MBB,
2266  const MachineFunction &MF) const {
2267  // XXX - Do we want the SP check in the base implementation?
2268 
2269  // Target-independent instructions do not have an implicit-use of EXEC, even
2270  // when they operate on VGPRs. Treating EXEC modifications as scheduling
2271  // boundaries prevents incorrect movements of such instructions.
2272  return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) ||
2273  MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
2274  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
2275  MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
2277 }
2278 
2279 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
2280  switch (Imm.getBitWidth()) {
2281  case 32:
2283  ST.hasInv2PiInlineImm());
2284  case 64:
2286  ST.hasInv2PiInlineImm());
2287  case 16:
2288  return ST.has16BitInsts() &&
2290  ST.hasInv2PiInlineImm());
2291  default:
2292  llvm_unreachable("invalid bitwidth");
2293  }
2294 }
2295 
2297  uint8_t OperandType) const {
2298  if (!MO.isImm() ||
2299  OperandType < AMDGPU::OPERAND_SRC_FIRST ||
2300  OperandType > AMDGPU::OPERAND_SRC_LAST)
2301  return false;
2302 
2303  // MachineOperand provides no way to tell the true operand size, since it only
2304  // records a 64-bit value. We need to know the size to determine if a 32-bit
2305  // floating point immediate bit pattern is legal for an integer immediate. It
2306  // would be for any 32-bit integer operand, but would not be for a 64-bit one.
2307 
2308  int64_t Imm = MO.getImm();
2309  switch (OperandType) {
2314  int32_t Trunc = static_cast<int32_t>(Imm);
2315  return Trunc == Imm &&
2317  }
2323  ST.hasInv2PiInlineImm());
2328  if (isInt<16>(Imm) || isUInt<16>(Imm)) {
2329  // A few special case instructions have 16-bit operands on subtargets
2330  // where 16-bit instructions are not legal.
2331  // TODO: Do the 32-bit immediates work? We shouldn't really need to handle
2332  // constants in these cases
2333  int16_t Trunc = static_cast<int16_t>(Imm);
2334  return ST.has16BitInsts() &&
2336  }
2337 
2338  return false;
2339  }
2342  uint32_t Trunc = static_cast<uint32_t>(Imm);
2344  }
2345  default:
2346  llvm_unreachable("invalid bitwidth");
2347  }
2348 }
2349 
2351  const MCOperandInfo &OpInfo) const {
2352  switch (MO.getType()) {
2354  return false;
2356  return !isInlineConstant(MO, OpInfo);
2362  return true;
2363  default:
2364  llvm_unreachable("unexpected operand type");
2365  }
2366 }
2367 
2368 static bool compareMachineOp(const MachineOperand &Op0,
2369  const MachineOperand &Op1) {
2370  if (Op0.getType() != Op1.getType())
2371  return false;
2372 
2373  switch (Op0.getType()) {
2375  return Op0.getReg() == Op1.getReg();
2377  return Op0.getImm() == Op1.getImm();
2378  default:
2379  llvm_unreachable("Didn't expect to be comparing these operand types");
2380  }
2381 }
2382 
2384  const MachineOperand &MO) const {
2385  const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
2386 
2387  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
2388 
2389  if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
2390  return true;
2391 
2392  if (OpInfo.RegClass < 0)
2393  return false;
2394 
2395  if (MO.isImm() && isInlineConstant(MO, OpInfo))
2396  return RI.opCanUseInlineConstant(OpInfo.OperandType);
2397 
2398  return RI.opCanUseLiteralConstant(OpInfo.OperandType);
2399 }
2400 
2401 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
2402  int Op32 = AMDGPU::getVOPe32(Opcode);
2403  if (Op32 == -1)
2404  return false;
2405 
2406  return pseudoToMCOpcode(Op32) != -1;
2407 }
2408 
2409 bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
2410  // The src0_modifier operand is present on all instructions
2411  // that have modifiers.
2412 
2413  return AMDGPU::getNamedOperandIdx(Opcode,
2414  AMDGPU::OpName::src0_modifiers) != -1;
2415 }
2416 
2418  unsigned OpName) const {
2419  const MachineOperand *Mods = getNamedOperand(MI, OpName);
2420  return Mods && Mods->getImm();
2421 }
2422 
2424  return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
2425  hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
2426  hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
2427  hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
2428  hasModifiersSet(MI, AMDGPU::OpName::omod);
2429 }
2430 
2432  const MachineOperand &MO,
2433  const MCOperandInfo &OpInfo) const {
2434  // Literal constants use the constant bus.
2435  //if (isLiteralConstantLike(MO, OpInfo))
2436  // return true;
2437  if (MO.isImm())
2438  return !isInlineConstant(MO, OpInfo);
2439 
2440  if (!MO.isReg())
2441  return true; // Misc other operands like FrameIndex
2442 
2443  if (!MO.isUse())
2444  return false;
2445 
2447  return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
2448 
2449  // FLAT_SCR is just an SGPR pair.
2450  if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
2451  return true;
2452 
2453  // EXEC register uses the constant bus.
2454  if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
2455  return true;
2456 
2457  // SGPRs use the constant bus
2458  return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
2459  (!MO.isImplicit() &&
2460  (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
2461  AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
2462 }
2463 
2464 static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
2465  for (const MachineOperand &MO : MI.implicit_operands()) {
2466  // We only care about reads.
2467  if (MO.isDef())
2468  continue;
2469 
2470  switch (MO.getReg()) {
2471  case AMDGPU::VCC:
2472  case AMDGPU::M0:
2473  case AMDGPU::FLAT_SCR:
2474  return MO.getReg();
2475 
2476  default:
2477  break;
2478  }
2479  }
2480 
2481  return AMDGPU::NoRegister;
2482 }
2483 
2484 static bool shouldReadExec(const MachineInstr &MI) {
2485  if (SIInstrInfo::isVALU(MI)) {
2486  switch (MI.getOpcode()) {
2487  case AMDGPU::V_READLANE_B32:
2488  case AMDGPU::V_READLANE_B32_si:
2489  case AMDGPU::V_READLANE_B32_vi:
2490  case AMDGPU::V_WRITELANE_B32:
2491  case AMDGPU::V_WRITELANE_B32_si:
2492  case AMDGPU::V_WRITELANE_B32_vi:
2493  return false;
2494  }
2495 
2496  return true;
2497  }
2498 
2499  if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
2500  SIInstrInfo::isSALU(MI) ||
2501  SIInstrInfo::isSMRD(MI))
2502  return false;
2503 
2504  return true;
2505 }
2506 
2507 static bool isSubRegOf(const SIRegisterInfo &TRI,
2508  const MachineOperand &SuperVec,
2509  const MachineOperand &SubReg) {
2511  return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
2512 
2513  return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
2514  SubReg.getReg() == SuperVec.getReg();
2515 }
2516 
2518  StringRef &ErrInfo) const {
2519  uint16_t Opcode = MI.getOpcode();
2520  if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
2521  return true;
2522 
2523  const MachineFunction *MF = MI.getParent()->getParent();
2524  const MachineRegisterInfo &MRI = MF->getRegInfo();
2525 
2526  int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2527  int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2528  int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2529 
2530  // Make sure the number of operands is correct.
2531  const MCInstrDesc &Desc = get(Opcode);
2532  if (!Desc.isVariadic() &&
2533  Desc.getNumOperands() != MI.getNumExplicitOperands()) {
2534  ErrInfo = "Instruction has wrong number of operands.";
2535  return false;
2536  }
2537 
2538  if (MI.isInlineAsm()) {
2539  // Verify register classes for inlineasm constraints.
2540  for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
2541  I != E; ++I) {
2542  const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
2543  if (!RC)
2544  continue;
2545 
2546  const MachineOperand &Op = MI.getOperand(I);
2547  if (!Op.isReg())
2548  continue;
2549 
2550  unsigned Reg = Op.getReg();
2551  if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
2552  ErrInfo = "inlineasm operand has incorrect register class.";
2553  return false;
2554  }
2555  }
2556 
2557  return true;
2558  }
2559 
2560  // Make sure the register classes are correct.
2561  for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
2562  if (MI.getOperand(i).isFPImm()) {
2563  ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
2564  "all fp values to integers.";
2565  return false;
2566  }
2567 
2568  int RegClass = Desc.OpInfo[i].RegClass;
2569 
2570  switch (Desc.OpInfo[i].OperandType) {
2572  if (MI.getOperand(i).isImm()) {
2573  ErrInfo = "Illegal immediate value for operand.";
2574  return false;
2575  }
2576  break;
2579  break;
2586  const MachineOperand &MO = MI.getOperand(i);
2587  if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
2588  ErrInfo = "Illegal immediate value for operand.";
2589  return false;
2590  }
2591  break;
2592  }
2595  // Check if this operand is an immediate.
2596  // FrameIndex operands will be replaced by immediates, so they are
2597  // allowed.
2598  if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
2599  ErrInfo = "Expected immediate, but got non-immediate";
2600  return false;
2601  }
2603  default:
2604  continue;
2605  }
2606 
2607  if (!MI.getOperand(i).isReg())
2608  continue;
2609 
2610  if (RegClass != -1) {
2611  unsigned Reg = MI.getOperand(i).getReg();
2612  if (Reg == AMDGPU::NoRegister ||
2614  continue;
2615 
2616  const TargetRegisterClass *RC = RI.getRegClass(RegClass);
2617  if (!RC->contains(Reg)) {
2618  ErrInfo = "Operand has incorrect register class.";
2619  return false;
2620  }
2621  }
2622  }
2623 
2624  // Verify SDWA
2625  if (isSDWA(MI)) {
2626  if (!ST.hasSDWA()) {
2627  ErrInfo = "SDWA is not supported on this target";
2628  return false;
2629  }
2630 
2631  int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2632 
2633  const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
2634 
2635  for (int OpIdx: OpIndicies) {
2636  if (OpIdx == -1)
2637  continue;
2638  const MachineOperand &MO = MI.getOperand(OpIdx);
2639 
2640  if (!ST.hasSDWAScalar()) {
2641  // Only VGPRS on VI
2642  if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
2643  ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
2644  return false;
2645  }
2646  } else {
2647  // No immediates on GFX9
2648  if (!MO.isReg()) {
2649  ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
2650  return false;
2651  }
2652  }
2653  }
2654 
2655  if (!ST.hasSDWAOmod()) {
2656  // No omod allowed on VI
2657  const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2658  if (OMod != nullptr &&
2659  (!OMod->isImm() || OMod->getImm() != 0)) {
2660  ErrInfo = "OMod not allowed in SDWA instructions on VI";
2661  return false;
2662  }
2663  }
2664 
2665  uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
2666  if (isVOPC(BasicOpcode)) {
2667  if (!ST.hasSDWASdst() && DstIdx != -1) {
2668  // Only vcc allowed as dst on VI for VOPC
2669  const MachineOperand &Dst = MI.getOperand(DstIdx);
2670  if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
2671  ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
2672  return false;
2673  }
2674  } else if (!ST.hasSDWAOutModsVOPC()) {
2675  // No clamp allowed on GFX9 for VOPC
2676  const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
2677  if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
2678  ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
2679  return false;
2680  }
2681 
2682  // No omod allowed on GFX9 for VOPC
2683  const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
2684  if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
2685  ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
2686  return false;
2687  }
2688  }
2689  }
2690 
2691  const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused);
2692  if (DstUnused && DstUnused->isImm() &&
2693  DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) {
2694  const MachineOperand &Dst = MI.getOperand(DstIdx);
2695  if (!Dst.isReg() || !Dst.isTied()) {
2696  ErrInfo = "Dst register should have tied register";
2697  return false;
2698  }
2699 
2700  const MachineOperand &TiedMO =
2701  MI.getOperand(MI.findTiedOperandIdx(DstIdx));
2702  if (!TiedMO.isReg() || !TiedMO.isImplicit() || !TiedMO.isUse()) {
2703  ErrInfo =
2704  "Dst register should be tied to implicit use of preserved register";
2705  return false;
2706  } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) &&
2707  Dst.getReg() != TiedMO.getReg()) {
2708  ErrInfo = "Dst register should use same physical register as preserved";
2709  return false;
2710  }
2711  }
2712  }
2713 
2714  // Verify VOP*
2715  if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) {
2716  // Only look at the true operands. Only a real operand can use the constant
2717  // bus, and we don't want to check pseudo-operands like the source modifier
2718  // flags.
2719  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2720 
2721  unsigned ConstantBusCount = 0;
2722 
2723  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
2724  ++ConstantBusCount;
2725 
2726  unsigned SGPRUsed = findImplicitSGPRRead(MI);
2727  if (SGPRUsed != AMDGPU::NoRegister)
2728  ++ConstantBusCount;
2729 
2730  for (int OpIdx : OpIndices) {
2731  if (OpIdx == -1)
2732  break;
2733  const MachineOperand &MO = MI.getOperand(OpIdx);
2734  if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
2735  if (MO.isReg()) {
2736  if (MO.getReg() != SGPRUsed)
2737  ++ConstantBusCount;
2738  SGPRUsed = MO.getReg();
2739  } else {
2740  ++ConstantBusCount;
2741  }
2742  }
2743  }
2744  if (ConstantBusCount > 1) {
2745  ErrInfo = "VOP* instruction uses the constant bus more than once";
2746  return false;
2747  }
2748  }
2749 
2750  // Verify misc. restrictions on specific instructions.
2751  if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
2752  Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
2753  const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2754  const MachineOperand &Src1 = MI.getOperand(Src1Idx);
2755  const MachineOperand &Src2 = MI.getOperand(Src2Idx);
2756  if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
2757  if (!compareMachineOp(Src0, Src1) &&
2758  !compareMachineOp(Src0, Src2)) {
2759  ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
2760  return false;
2761  }
2762  }
2763  }
2764 
2765  if (isSOPK(MI)) {
2766  int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
2767  if (sopkIsZext(MI)) {
2768  if (!isUInt<16>(Imm)) {
2769  ErrInfo = "invalid immediate for SOPK instruction";
2770  return false;
2771  }
2772  } else {
2773  if (!isInt<16>(Imm)) {
2774  ErrInfo = "invalid immediate for SOPK instruction";
2775  return false;
2776  }
2777  }
2778  }
2779 
2780  if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
2781  Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
2782  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2783  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
2784  const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
2785  Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
2786 
2787  const unsigned StaticNumOps = Desc.getNumOperands() +
2788  Desc.getNumImplicitUses();
2789  const unsigned NumImplicitOps = IsDst ? 2 : 1;
2790 
2791  // Allow additional implicit operands. This allows a fixup done by the post
2792  // RA scheduler where the main implicit operand is killed and implicit-defs
2793  // are added for sub-registers that remain live after this instruction.
2794  if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
2795  ErrInfo = "missing implicit register operands";
2796  return false;
2797  }
2798 
2799  const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
2800  if (IsDst) {
2801  if (!Dst->isUse()) {
2802  ErrInfo = "v_movreld_b32 vdst should be a use operand";
2803  return false;
2804  }
2805 
2806  unsigned UseOpIdx;
2807  if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
2808  UseOpIdx != StaticNumOps + 1) {
2809  ErrInfo = "movrel implicit operands should be tied";
2810  return false;
2811  }
2812  }
2813 
2814  const MachineOperand &Src0 = MI.getOperand(Src0Idx);
2815  const MachineOperand &ImpUse
2816  = MI.getOperand(StaticNumOps + NumImplicitOps - 1);
2817  if (!ImpUse.isReg() || !ImpUse.isUse() ||
2818  !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
2819  ErrInfo = "src0 should be subreg of implicit vector use";
2820  return false;
2821  }
2822  }
2823 
2824  // Make sure we aren't losing exec uses in the td files. This mostly requires
2825  // being careful when using let Uses to try to add other use registers.
2826  if (shouldReadExec(MI)) {
2827  if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
2828  ErrInfo = "VALU instruction does not implicitly read exec mask";
2829  return false;
2830  }
2831  }
2832 
2833  if (isSMRD(MI)) {
2834  if (MI.mayStore()) {
2835  // The register offset form of scalar stores may only use m0 as the
2836  // soffset register.
2837  const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
2838  if (Soff && Soff->getReg() != AMDGPU::M0) {
2839  ErrInfo = "scalar stores must use m0 as offset register";
2840  return false;
2841  }
2842  }
2843  }
2844 
2845  if (isFLAT(MI) && !MF->getSubtarget<SISubtarget>().hasFlatInstOffsets()) {
2846  const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
2847  if (Offset->getImm() != 0) {
2848  ErrInfo = "subtarget does not support offsets in flat instructions";
2849  return false;
2850  }
2851  }
2852 
2853  return true;
2854 }
2855 
2856 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
2857  switch (MI.getOpcode()) {
2858  default: return AMDGPU::INSTRUCTION_LIST_END;
2859  case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
2860  case AMDGPU::COPY: return AMDGPU::COPY;
2861  case AMDGPU::PHI: return AMDGPU::PHI;
2862  case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
2863  case AMDGPU::WQM: return AMDGPU::WQM;
2864  case AMDGPU::WWM: return AMDGPU::WWM;
2865  case AMDGPU::S_MOV_B32:
2866  return MI.getOperand(1).isReg() ?
2867  AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
2868  case AMDGPU::S_ADD_I32:
2869  return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
2870  case AMDGPU::S_ADDC_U32:
2871  return AMDGPU::V_ADDC_U32_e32;
2872  case AMDGPU::S_SUB_I32:
2873  return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
2874  // FIXME: These are not consistently handled, and selected when the carry is
2875  // used.
2876  case AMDGPU::S_ADD_U32:
2877  return AMDGPU::V_ADD_I32_e32;
2878  case AMDGPU::S_SUB_U32:
2879  return AMDGPU::V_SUB_I32_e32;
2880  case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
2881  case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
2882  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
2883  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
2884  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
2885  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
2886  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
2887  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
2888  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
2889  case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
2890  case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
2891  case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
2892  case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
2893  case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
2894  case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
2895  case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
2896  case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
2897  case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
2898  case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
2899  case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
2900  case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
2901  case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
2902  case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
2903  case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
2904  case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
2905  case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
2906  case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
2907  case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
2908  case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
2909  case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
2910  case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
2911  case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
2912  case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
2913  case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
2914  case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
2915  case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
2916  case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
2917  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
2918  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
2919  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
2920  case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
2921  case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
2922  case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
2923  }
2924 }
2925 
2927  unsigned OpNo) const {
2929  const MCInstrDesc &Desc = get(MI.getOpcode());
2930  if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
2931  Desc.OpInfo[OpNo].RegClass == -1) {
2932  unsigned Reg = MI.getOperand(OpNo).getReg();
2933 
2935  return MRI.getRegClass(Reg);
2936  return RI.getPhysRegClass(Reg);
2937  }
2938 
2939  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
2940  return RI.getRegClass(RCID);
2941 }
2942 
2943 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
2944  switch (MI.getOpcode()) {
2945  case AMDGPU::COPY:
2946  case AMDGPU::REG_SEQUENCE:
2947  case AMDGPU::PHI:
2948  case AMDGPU::INSERT_SUBREG:
2949  return RI.hasVGPRs(getOpRegClass(MI, 0));
2950  default:
2951  return RI.hasVGPRs(getOpRegClass(MI, OpNo));
2952  }
2953 }
2954 
2955 void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
2957  MachineBasicBlock *MBB = MI.getParent();
2958  MachineOperand &MO = MI.getOperand(OpIdx);
2960  unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
2961  const TargetRegisterClass *RC = RI.getRegClass(RCID);
2962  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
2963  if (MO.isReg())
2964  Opcode = AMDGPU::COPY;
2965  else if (RI.isSGPRClass(RC))
2966  Opcode = AMDGPU::S_MOV_B32;
2967 
2968  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
2969  if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
2970  VRC = &AMDGPU::VReg_64RegClass;
2971  else
2972  VRC = &AMDGPU::VGPR_32RegClass;
2973 
2974  unsigned Reg = MRI.createVirtualRegister(VRC);
2975  DebugLoc DL = MBB->findDebugLoc(I);
2976  BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
2977  MO.ChangeToRegister(Reg, false);
2978 }
2979 
2982  MachineOperand &SuperReg,
2983  const TargetRegisterClass *SuperRC,
2984  unsigned SubIdx,
2985  const TargetRegisterClass *SubRC)
2986  const {
2987  MachineBasicBlock *MBB = MI->getParent();
2988  DebugLoc DL = MI->getDebugLoc();
2989  unsigned SubReg = MRI.createVirtualRegister(SubRC);
2990 
2991  if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
2992  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
2993  .addReg(SuperReg.getReg(), 0, SubIdx);
2994  return SubReg;
2995  }
2996 
2997  // Just in case the super register is itself a sub-register, copy it to a new
2998  // value so we don't need to worry about merging its subreg index with the
2999  // SubIdx passed to this function. The register coalescer should be able to
3000  // eliminate this extra copy.
3001  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
3002 
3003  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
3004  .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
3005 
3006  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
3007  .addReg(NewSuperReg, 0, SubIdx);
3008 
3009  return SubReg;
3010 }
3011 
3015  MachineOperand &Op,
3016  const TargetRegisterClass *SuperRC,
3017  unsigned SubIdx,
3018  const TargetRegisterClass *SubRC) const {
3019  if (Op.isImm()) {
3020  if (SubIdx == AMDGPU::sub0)
3021  return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
3022  if (SubIdx == AMDGPU::sub1)
3023  return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
3024 
3025  llvm_unreachable("Unhandled register index for immediate");
3026  }
3027 
3028  unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
3029  SubIdx, SubRC);
3030  return MachineOperand::CreateReg(SubReg, false);
3031 }
3032 
3033 // Change the order of operands from (0, 1, 2) to (0, 2, 1)
3034 void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
3035  assert(Inst.getNumExplicitOperands() == 3);
3036  MachineOperand Op1 = Inst.getOperand(1);
3037  Inst.RemoveOperand(1);
3038  Inst.addOperand(Op1);
3039 }
3040 
3042  const MCOperandInfo &OpInfo,
3043  const MachineOperand &MO) const {
3044  if (!MO.isReg())
3045  return false;
3046 
3047  unsigned Reg = MO.getReg();
3048  const TargetRegisterClass *RC =
3050  MRI.getRegClass(Reg) :
3051  RI.getPhysRegClass(Reg);
3052 
3053  const SIRegisterInfo *TRI =
3054  static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
3055  RC = TRI->getSubRegClass(RC, MO.getSubReg());
3056 
3057  // In order to be legal, the common sub-class must be equal to the
3058  // class of the current operand. For example:
3059  //
3060  // v_mov_b32 s0 ; Operand defined as vsrc_b32
3061  // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
3062  //
3063  // s_sendmsg 0, s0 ; Operand defined as m0reg
3064  // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
3065 
3066  return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
3067 }
3068 
3070  const MCOperandInfo &OpInfo,
3071  const MachineOperand &MO) const {
3072  if (MO.isReg())
3073  return isLegalRegOperand(MRI, OpInfo, MO);
3074 
3075  // Handle non-register types that are treated like immediates.
3076  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
3077  return true;
3078 }
3079 
3080 bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
3081  const MachineOperand *MO) const {
3083  const MCInstrDesc &InstDesc = MI.getDesc();
3084  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
3085  const TargetRegisterClass *DefinedRC =
3086  OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
3087  if (!MO)
3088  MO = &MI.getOperand(OpIdx);
3089 
3090  if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
3091 
3092  RegSubRegPair SGPRUsed;
3093  if (MO->isReg())
3094  SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
3095 
3096  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
3097  if (i == OpIdx)
3098  continue;
3099  const MachineOperand &Op = MI.getOperand(i);
3100  if (Op.isReg()) {
3101  if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
3102  usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
3103  return false;
3104  }
3105  } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
3106  return false;
3107  }
3108  }
3109  }
3110 
3111  if (MO->isReg()) {
3112  assert(DefinedRC);
3113  return isLegalRegOperand(MRI, OpInfo, *MO);
3114  }
3115 
3116  // Handle non-register types that are treated like immediates.
3117  assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
3118 
3119  if (!DefinedRC) {
3120  // This operand expects an immediate.
3121  return true;
3122  }
3123 
3124  return isImmOperandLegal(MI, OpIdx, *MO);
3125 }
3126 
3128  MachineInstr &MI) const {
3129  unsigned Opc = MI.getOpcode();
3130  const MCInstrDesc &InstrDesc = get(Opc);
3131 
3132  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
3133  MachineOperand &Src1 = MI.getOperand(Src1Idx);
3134 
3135  // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
3136  // we need to only have one constant bus use.
3137  //
3138  // Note we do not need to worry about literal constants here. They are
3139  // disabled for the operand type for instructions because they will always
3140  // violate the one constant bus use rule.
3141  bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
3142  if (HasImplicitSGPR) {
3143  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3144  MachineOperand &Src0 = MI.getOperand(Src0Idx);
3145 
3146  if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
3147  legalizeOpWithMove(MI, Src0Idx);
3148  }
3149 
3150  // VOP2 src0 instructions support all operand types, so we don't need to check
3151  // their legality. If src1 is already legal, we don't need to do anything.
3152  if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
3153  return;
3154 
3155  // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
3156  // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
3157  // select is uniform.
3158  if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
3159  RI.isVGPR(MRI, Src1.getReg())) {
3160  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
3161  const DebugLoc &DL = MI.getDebugLoc();
3162  BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
3163  .add(Src1);
3164  Src1.ChangeToRegister(Reg, false);
3165  return;
3166  }
3167 
3168  // We do not use commuteInstruction here because it is too aggressive and will
3169  // commute if it is possible. We only want to commute here if it improves
3170  // legality. This can be called a fairly large number of times so don't waste
3171  // compile time pointlessly swapping and checking legality again.
3172  if (HasImplicitSGPR || !MI.isCommutable()) {
3173  legalizeOpWithMove(MI, Src1Idx);
3174  return;
3175  }
3176 
3177  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3178  MachineOperand &Src0 = MI.getOperand(Src0Idx);
3179 
3180  // If src0 can be used as src1, commuting will make the operands legal.
3181  // Otherwise we have to give up and insert a move.
3182  //
3183  // TODO: Other immediate-like operand kinds could be commuted if there was a
3184  // MachineOperand::ChangeTo* for them.
3185  if ((!Src1.isImm() && !Src1.isReg()) ||
3186  !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
3187  legalizeOpWithMove(MI, Src1Idx);
3188  return;
3189  }
3190 
3191  int CommutedOpc = commuteOpcode(MI);
3192  if (CommutedOpc == -1) {
3193  legalizeOpWithMove(MI, Src1Idx);
3194  return;
3195  }
3196 
3197  MI.setDesc(get(CommutedOpc));
3198 
3199  unsigned Src0Reg = Src0.getReg();
3200  unsigned Src0SubReg = Src0.getSubReg();
3201  bool Src0Kill = Src0.isKill();
3202 
3203  if (Src1.isImm())
3204  Src0.ChangeToImmediate(Src1.getImm());
3205  else if (Src1.isReg()) {
3206  Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
3207  Src0.setSubReg(Src1.getSubReg());
3208  } else
3209  llvm_unreachable("Should only have register or immediate operands");
3210 
3211  Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
3212  Src1.setSubReg(Src0SubReg);
3213 }
3214 
3215 // Legalize VOP3 operands. Because all operand types are supported for any
3216 // operand, and since literal constants are not allowed and should never be
3217 // seen, we only need to worry about inserting copies if we use multiple SGPR
3218 // operands.
3220  MachineInstr &MI) const {
3221  unsigned Opc = MI.getOpcode();
3222 
3223  int VOP3Idx[3] = {
3224  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
3225  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
3226  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
3227  };
3228 
3229  // Find the one SGPR operand we are allowed to use.
3230  unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
3231 
3232  for (unsigned i = 0; i < 3; ++i) {
3233  int Idx = VOP3Idx[i];
3234  if (Idx == -1)
3235  break;
3236  MachineOperand &MO = MI.getOperand(Idx);
3237 
3238  // We should never see a VOP3 instruction with an illegal immediate operand.
3239  if (!MO.isReg())
3240  continue;
3241 
3242  if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
3243  continue; // VGPRs are legal
3244 
3245  if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
3246  SGPRReg = MO.getReg();
3247  // We can use one SGPR in each VOP3 instruction.
3248  continue;
3249  }
3250 
3251  // If we make it this far, then the operand is not legal and we must
3252  // legalize it.
3253  legalizeOpWithMove(MI, Idx);
3254  }
3255 }
3256 
3258  MachineRegisterInfo &MRI) const {
3259  const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
3260  const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
3261  unsigned DstReg = MRI.createVirtualRegister(SRC);
3262  unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
3263 
3265  for (unsigned i = 0; i < SubRegs; ++i) {
3266  unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3267  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
3268  get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
3269  .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
3270  SRegs.push_back(SGPR);
3271  }
3272 
3273  MachineInstrBuilder MIB =
3274  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
3275  get(AMDGPU::REG_SEQUENCE), DstReg);
3276  for (unsigned i = 0; i < SubRegs; ++i) {
3277  MIB.addReg(SRegs[i]);
3278  MIB.addImm(RI.getSubRegFromChannel(i));
3279  }
3280  return DstReg;
3281 }
3282 
3284  MachineInstr &MI) const {
3285 
3286  // If the pointer is store in VGPRs, then we need to move them to
3287  // SGPRs using v_readfirstlane. This is safe because we only select
3288  // loads with uniform pointers to SMRD instruction so we know the
3289  // pointer value is uniform.
3290  MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
3291  if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
3292  unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
3293  SBase->setReg(SGPR);
3294  }
3295 }
3296 
3299  const TargetRegisterClass *DstRC,
3300  MachineOperand &Op,
3302  const DebugLoc &DL) const {
3303  unsigned OpReg = Op.getReg();
3304  unsigned OpSubReg = Op.getSubReg();
3305 
3306  const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
3307  RI.getRegClassForReg(MRI, OpReg), OpSubReg);
3308 
3309  // Check if operand is already the correct register class.
3310  if (DstRC == OpRC)
3311  return;
3312 
3313  unsigned DstReg = MRI.createVirtualRegister(DstRC);
3314  MachineInstr *Copy =
3315  BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
3316 
3317  Op.setReg(DstReg);
3318  Op.setSubReg(0);
3319 
3320  MachineInstr *Def = MRI.getVRegDef(OpReg);
3321  if (!Def)
3322  return;
3323 
3324  // Try to eliminate the copy if it is copying an immediate value.
3325  if (Def->isMoveImmediate())
3326  FoldImmediate(*Copy, *Def, OpReg, &MRI);
3327 }
3328 
3330  MachineFunction &MF = *MI.getParent()->getParent();
3332 
3333  // Legalize VOP2
3334  if (isVOP2(MI) || isVOPC(MI)) {
3335  legalizeOperandsVOP2(MRI, MI);
3336  return;
3337  }
3338 
3339  // Legalize VOP3
3340  if (isVOP3(MI)) {
3341  legalizeOperandsVOP3(MRI, MI);
3342  return;
3343  }
3344 
3345  // Legalize SMRD
3346  if (isSMRD(MI)) {
3347  legalizeOperandsSMRD(MRI, MI);
3348  return;
3349  }
3350 
3351  // Legalize REG_SEQUENCE and PHI
3352  // The register class of the operands much be the same type as the register
3353  // class of the output.
3354  if (MI.getOpcode() == AMDGPU::PHI) {
3355  const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
3356  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
3357  if (!MI.getOperand(i).isReg() ||
3359  continue;
3360  const TargetRegisterClass *OpRC =
3361  MRI.getRegClass(MI.getOperand(i).getReg());
3362  if (RI.hasVGPRs(OpRC)) {
3363  VRC = OpRC;
3364  } else {
3365  SRC = OpRC;
3366  }
3367  }
3368 
3369  // If any of the operands are VGPR registers, then they all most be
3370  // otherwise we will create illegal VGPR->SGPR copies when legalizing
3371  // them.
3372  if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
3373  if (!VRC) {
3374  assert(SRC);
3375  VRC = RI.getEquivalentVGPRClass(SRC);
3376  }
3377  RC = VRC;
3378  } else {
3379  RC = SRC;
3380  }
3381 
3382  // Update all the operands so they have the same type.
3383  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3384  MachineOperand &Op = MI.getOperand(I);
3386  continue;
3387 
3388  // MI is a PHI instruction.
3389  MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
3390  MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
3391 
3392  // Avoid creating no-op copies with the same src and dst reg class. These
3393  // confuse some of the machine passes.
3394  legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
3395  }
3396  }
3397 
3398  // REG_SEQUENCE doesn't really require operand legalization, but if one has a
3399  // VGPR dest type and SGPR sources, insert copies so all operands are
3400  // VGPRs. This seems to help operand folding / the register coalescer.
3401  if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
3402  MachineBasicBlock *MBB = MI.getParent();
3403  const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
3404  if (RI.hasVGPRs(DstRC)) {
3405  // Update all the operands so they are VGPR register classes. These may
3406  // not be the same register class because REG_SEQUENCE supports mixing
3407  // subregister index types e.g. sub0_sub1 + sub2 + sub3
3408  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3409  MachineOperand &Op = MI.getOperand(I);
3411  continue;
3412 
3413  const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
3414  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
3415  if (VRC == OpRC)
3416  continue;
3417 
3418  legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
3419  Op.setIsKill();
3420  }
3421  }
3422 
3423  return;
3424  }
3425 
3426  // Legalize INSERT_SUBREG
3427  // src0 must have the same register class as dst
3428  if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
3429  unsigned Dst = MI.getOperand(0).getReg();
3430  unsigned Src0 = MI.getOperand(1).getReg();
3431  const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
3432  const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
3433  if (DstRC != Src0RC) {
3434  MachineBasicBlock *MBB = MI.getParent();
3435  MachineOperand &Op = MI.getOperand(1);
3436  legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
3437  }
3438  return;
3439  }
3440 
3441  // Legalize MIMG and MUBUF/MTBUF for shaders.
3442  //
3443  // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
3444  // scratch memory access. In both cases, the legalization never involves
3445  // conversion to the addr64 form.
3446  if (isMIMG(MI) ||
3448  (isMUBUF(MI) || isMTBUF(MI)))) {
3449  MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
3450  if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
3451  unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
3452  SRsrc->setReg(SGPR);
3453  }
3454 
3455  MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
3456  if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
3457  unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
3458  SSamp->setReg(SGPR);
3459  }
3460  return;
3461  }
3462 
3463  // Legalize MUBUF* instructions by converting to addr64 form.
3464  // FIXME: If we start using the non-addr64 instructions for compute, we
3465  // may need to legalize them as above. This especially applies to the
3466  // buffer_load_format_* variants and variants with idxen (or bothen).
3467  int SRsrcIdx =
3468  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
3469  if (SRsrcIdx != -1) {
3470  // We have an MUBUF instruction
3471  MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx);
3472  unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass;
3473  if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
3474  RI.getRegClass(SRsrcRC))) {
3475  // The operands are legal.
3476  // FIXME: We may need to legalize operands besided srsrc.
3477  return;
3478  }
3479 
3480  MachineBasicBlock &MBB = *MI.getParent();
3481 
3482  // Extract the ptr from the resource descriptor.
3483  unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
3484  &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
3485 
3486  // Create an empty resource descriptor
3487  unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
3488  unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3489  unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3490  unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
3491  uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
3492 
3493  // Zero64 = 0
3494  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64)
3495  .addImm(0);
3496 
3497  // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
3498  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
3499  .addImm(RsrcDataFormat & 0xFFFFFFFF);
3500 
3501  // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
3502  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
3503  .addImm(RsrcDataFormat >> 32);
3504 
3505  // NewSRsrc = {Zero64, SRsrcFormat}
3506  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
3507  .addReg(Zero64)
3508  .addImm(AMDGPU::sub0_sub1)
3509  .addReg(SRsrcFormatLo)
3510  .addImm(AMDGPU::sub2)
3511  .addReg(SRsrcFormatHi)
3512  .addImm(AMDGPU::sub3);
3513 
3514  MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
3515  unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
3516  if (VAddr) {
3517  // This is already an ADDR64 instruction so we need to add the pointer
3518  // extracted from the resource descriptor to the current value of VAddr.
3519  unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3520  unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3521 
3522  // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
3523  DebugLoc DL = MI.getDebugLoc();
3524  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
3525  .addReg(SRsrcPtr, 0, AMDGPU::sub0)
3526  .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
3527 
3528  // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
3529  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
3530  .addReg(SRsrcPtr, 0, AMDGPU::sub1)
3531  .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
3532 
3533  // NewVaddr = {NewVaddrHi, NewVaddrLo}
3534  BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
3535  .addReg(NewVAddrLo)
3536  .addImm(AMDGPU::sub0)
3537  .addReg(NewVAddrHi)
3538  .addImm(AMDGPU::sub1);
3539  } else {
3540  // This instructions is the _OFFSET variant, so we need to convert it to
3541  // ADDR64.
3542  assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
3544  "FIXME: Need to emit flat atomics here");
3545 
3546  MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
3547  MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
3548  MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
3549  unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
3550 
3551  // Atomics rith return have have an additional tied operand and are
3552  // missing some of the special bits.
3553  MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
3554  MachineInstr *Addr64;
3555 
3556  if (!VDataIn) {
3557  // Regular buffer load / store.
3558  MachineInstrBuilder MIB =
3559  BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
3560  .add(*VData)
3561  .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
3562  // This will be replaced later
3563  // with the new value of vaddr.
3564  .add(*SRsrc)
3565  .add(*SOffset)
3566  .add(*Offset);
3567 
3568  // Atomics do not have this operand.
3569  if (const MachineOperand *GLC =
3570  getNamedOperand(MI, AMDGPU::OpName::glc)) {
3571  MIB.addImm(GLC->getImm());
3572  }
3573 
3574  MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
3575 
3576  if (const MachineOperand *TFE =
3577  getNamedOperand(MI, AMDGPU::OpName::tfe)) {
3578  MIB.addImm(TFE->getImm());
3579  }
3580 
3582  Addr64 = MIB;
3583  } else {
3584  // Atomics with return.
3585  Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
3586  .add(*VData)
3587  .add(*VDataIn)
3588  .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
3589  // This will be replaced later
3590  // with the new value of vaddr.
3591  .add(*SRsrc)
3592  .add(*SOffset)
3593  .add(*Offset)
3594  .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
3596  }
3597 
3598  MI.removeFromParent();
3599 
3600  // NewVaddr = {NewVaddrHi, NewVaddrLo}
3601  BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
3602  NewVAddr)
3603  .addReg(SRsrcPtr, 0, AMDGPU::sub0)
3604  .addImm(AMDGPU::sub0)
3605  .addReg(SRsrcPtr, 0, AMDGPU::sub1)
3606  .addImm(AMDGPU::sub1);
3607 
3608  VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
3609  SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
3610  }
3611 
3612  // Update the instruction to use NewVaddr
3613  VAddr->setReg(NewVAddr);
3614  // Update the instruction to use NewSRsrc
3615  SRsrc->setReg(NewSRsrc);
3616  }
3617 }
3618 
3620  SetVectorType Worklist;
3621  Worklist.insert(&TopInst);
3622 
3623  while (!Worklist.empty()) {
3624  MachineInstr &Inst = *Worklist.pop_back_val();
3625  MachineBasicBlock *MBB = Inst.getParent();
3627 
3628  unsigned Opcode = Inst.getOpcode();
3629  unsigned NewOpcode = getVALUOp(Inst);
3630 
3631  // Handle some special cases
3632  switch (Opcode) {
3633  default:
3634  break;
3635  case AMDGPU::S_ADD_U64_PSEUDO:
3636  case AMDGPU::S_SUB_U64_PSEUDO:
3637  splitScalar64BitAddSub(Worklist, Inst);
3638  Inst.eraseFromParent();
3639  continue;
3640  case AMDGPU::S_ADD_I32:
3641  case AMDGPU::S_SUB_I32:
3642  // FIXME: The u32 versions currently selected use the carry.
3643  if (moveScalarAddSub(Worklist, Inst))
3644  continue;
3645 
3646  // Default handling
3647  break;
3648  case AMDGPU::S_AND_B64:
3649  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
3650  Inst.eraseFromParent();
3651  continue;
3652 
3653  case AMDGPU::S_OR_B64:
3654  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
3655  Inst.eraseFromParent();
3656  continue;
3657 
3658  case AMDGPU::S_XOR_B64:
3659  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
3660  Inst.eraseFromParent();
3661  continue;
3662 
3663  case AMDGPU::S_NOT_B64:
3664  splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
3665  Inst.eraseFromParent();
3666  continue;
3667 
3668  case AMDGPU::S_BCNT1_I32_B64:
3669  splitScalar64BitBCNT(Worklist, Inst);
3670  Inst.eraseFromParent();
3671  continue;
3672 
3673  case AMDGPU::S_BFE_I64:
3674  splitScalar64BitBFE(Worklist, Inst);
3675  Inst.eraseFromParent();
3676  continue;
3677 
3678  case AMDGPU::S_LSHL_B32:
3680  NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
3681  swapOperands(Inst);
3682  }
3683  break;
3684  case AMDGPU::S_ASHR_I32:
3686  NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
3687  swapOperands(Inst);
3688  }
3689  break;
3690  case AMDGPU::S_LSHR_B32:
3692  NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
3693  swapOperands(Inst);
3694  }
3695  break;
3696  case AMDGPU::S_LSHL_B64:
3698  NewOpcode = AMDGPU::V_LSHLREV_B64;
3699  swapOperands(Inst);
3700  }
3701  break;
3702  case AMDGPU::S_ASHR_I64:
3704  NewOpcode = AMDGPU::V_ASHRREV_I64;
3705  swapOperands(Inst);
3706  }
3707  break;
3708  case AMDGPU::S_LSHR_B64:
3710  NewOpcode = AMDGPU::V_LSHRREV_B64;
3711  swapOperands(Inst);
3712  }
3713  break;
3714 
3715  case AMDGPU::S_ABS_I32:
3716  lowerScalarAbs(Worklist, Inst);
3717  Inst.eraseFromParent();
3718  continue;
3719 
3720  case AMDGPU::S_CBRANCH_SCC0:
3721  case AMDGPU::S_CBRANCH_SCC1:
3722  // Clear unused bits of vcc
3723  BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
3724  AMDGPU::VCC)
3725  .addReg(AMDGPU::EXEC)
3726  .addReg(AMDGPU::VCC);
3727  break;
3728 
3729  case AMDGPU::S_BFE_U64:
3730  case AMDGPU::S_BFM_B64:
3731  llvm_unreachable("Moving this op to VALU not implemented");
3732 
3733  case AMDGPU::S_PACK_LL_B32_B16:
3734  case AMDGPU::S_PACK_LH_B32_B16:
3735  case AMDGPU::S_PACK_HH_B32_B16:
3736  movePackToVALU(Worklist, MRI, Inst);
3737  Inst.eraseFromParent();
3738  continue;
3739 
3740  case AMDGPU::S_XNOR_B32:
3741  lowerScalarXnor(Worklist, Inst);
3742  Inst.eraseFromParent();
3743  continue;
3744 
3745  case AMDGPU::S_XNOR_B64:
3746  splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32);
3747  Inst.eraseFromParent();
3748  continue;
3749 
3750  case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
3751  unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3752  const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff);
3753  auto Add = MRI.getUniqueVRegDef(VAddr->getReg());
3754  unsigned Offset = 0;
3755 
3756  // FIXME: This isn't safe because the addressing mode doesn't work
3757  // correctly if vaddr is negative.
3758  //
3759  // FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
3760  //
3761  // See if we can extract an immediate offset by recognizing one of these:
3762  // V_ADD_I32_e32 dst, imm, src1
3763  // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
3764  // V_ADD will be removed by "Remove dead machine instructions".
3765  if (Add &&
3766  (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
3767  Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
3768  static const unsigned SrcNames[2] = {
3769  AMDGPU::OpName::src0,
3770  AMDGPU::OpName::src1,
3771  };
3772 
3773  // Find a literal offset in one of source operands.
3774  for (int i = 0; i < 2; i++) {
3775  const MachineOperand *Src =
3776  getNamedOperand(*Add, SrcNames[i]);
3777 
3778  if (Src->isReg()) {
3779  auto Mov = MRI.getUniqueVRegDef(Src->getReg());
3780  if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
3781  Src = &Mov->getOperand(1);
3782  }
3783 
3784  if (Src) {
3785  if (Src->isImm())
3786  Offset = Src->getImm();
3787  else if (Src->isCImm())
3788  Offset = Src->getCImm()->getZExtValue();
3789  }
3790 
3791  if (Offset && isLegalMUBUFImmOffset(Offset)) {
3792  VAddr = getNamedOperand(*Add, SrcNames[!i]);
3793  break;
3794  }
3795 
3796  Offset = 0;
3797  }
3798  }
3799 
3800  MachineInstr *NewInstr =
3801  BuildMI(*MBB, Inst, Inst.getDebugLoc(),
3802  get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
3803  .add(*VAddr) // vaddr
3804  .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
3805  .addImm(0) // soffset
3806  .addImm(Offset) // offset
3807  .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
3808  .addImm(0) // slc
3809  .addImm(0) // tfe
3811  .getInstr();
3812 
3813  MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
3814  VDst);
3815  addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
3816  Inst.eraseFromParent();
3817 
3818  // Legalize all operands other than the offset. Notably, convert the srsrc
3819  // into SGPRs using v_readfirstlane if needed.
3820  legalizeOperands(*NewInstr);
3821  continue;
3822  }
3823  }
3824 
3825  if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
3826  // We cannot move this instruction to the VALU, so we should try to
3827  // legalize its operands instead.
3828  legalizeOperands(Inst);
3829  continue;
3830  }
3831 
3832  // Use the new VALU Opcode.
3833  const MCInstrDesc &NewDesc = get(NewOpcode);
3834  Inst.setDesc(NewDesc);
3835 
3836  // Remove any references to SCC. Vector instructions can't read from it, and
3837  // We're just about to add the implicit use / defs of VCC, and we don't want
3838  // both.
3839  for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
3840  MachineOperand &Op = Inst.getOperand(i);
3841  if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
3842  Inst.RemoveOperand(i);
3843  addSCCDefUsersToVALUWorklist(Inst, Worklist);
3844  }
3845  }
3846 
3847  if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
3848  // We are converting these to a BFE, so we need to add the missing
3849  // operands for the size and offset.
3850  unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
3853 
3854  } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
3855  // The VALU version adds the second operand to the result, so insert an
3856  // extra 0 operand.
3858  }
3859 
3861 
3862  if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
3863  const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
3864  // If we need to move this to VGPRs, we need to unpack the second operand
3865  // back into the 2 separate ones for bit offset and width.
3866  assert(OffsetWidthOp.isImm() &&
3867  "Scalar BFE is only implemented for constant width and offset");
3868  uint32_t Imm = OffsetWidthOp.getImm();
3869 
3870  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
3871  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
3872  Inst.RemoveOperand(2); // Remove old immediate.
3873  Inst.addOperand(MachineOperand::CreateImm(Offset));
3874  Inst.addOperand(MachineOperand::CreateImm(BitWidth));
3875  }
3876 
3877  bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
3878  unsigned NewDstReg = AMDGPU::NoRegister;
3879  if (HasDst) {
3880  unsigned DstReg = Inst.getOperand(0).getReg();
3882  continue;
3883 
3884  // Update the destination register class.
3885  const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
3886  if (!NewDstRC)
3887  continue;
3888 
3889  if (Inst.isCopy() &&
3891  NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
3892  // Instead of creating a copy where src and dst are the same register
3893  // class, we just replace all uses of dst with src. These kinds of
3894  // copies interfere with the heuristics MachineSink uses to decide
3895  // whether or not to split a critical edge. Since the pass assumes
3896  // that copies will end up as machine instructions and not be
3897  // eliminated.
3898  addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
3899  MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
3900  MRI.clearKillFlags(Inst.getOperand(1).getReg());
3901  Inst.getOperand(0).setReg(DstReg);
3902  continue;
3903  }
3904 
3905  NewDstReg = MRI.createVirtualRegister(NewDstRC);
3906  MRI.replaceRegWith(DstReg, NewDstReg);
3907  }
3908 
3909  // Legalize the operands
3910  legalizeOperands(Inst);
3911 
3912  if (HasDst)
3913  addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
3914  }
3915 }
3916 
3917 // Add/sub require special handling to deal with carry outs.
3918 bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist,
3919  MachineInstr &Inst) const {
3920  if (ST.hasAddNoCarry()) {
3921  // Assume there is no user of scc since we don't select this in that case.
3922  // Since scc isn't used, it doesn't really matter if the i32 or u32 variant
3923  // is used.
3924 
3925  MachineBasicBlock &MBB = *Inst.getParent();
3927 
3928  unsigned OldDstReg = Inst.getOperand(0).getReg();
3929  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3930 
3931  unsigned Opc = Inst.getOpcode();
3932  assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
3933 
3934  unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
3935  AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
3936 
3937  assert(Inst.getOperand(3).getReg() == AMDGPU::SCC);
3938  Inst.RemoveOperand(3);
3939 
3940  Inst.setDesc(get(NewOpc));
3941  Inst.addImplicitDefUseOperands(*MBB.getParent());
3942  MRI.replaceRegWith(OldDstReg, ResultReg);
3943  legalizeOperands(Inst);
3944 
3945  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3946  return true;
3947  }
3948 
3949  return false;
3950 }
3951 
3952 void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
3953  MachineInstr &Inst) const {
3954  MachineBasicBlock &MBB = *Inst.getParent();
3956  MachineBasicBlock::iterator MII = Inst;
3957  DebugLoc DL = Inst.getDebugLoc();
3958 
3959  MachineOperand &Dest = Inst.getOperand(0);
3960  MachineOperand &Src = Inst.getOperand(1);
3961  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3962  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3963 
3964  unsigned SubOp = ST.hasAddNoCarry() ?
3965  AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
3966 
3967  BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
3968  .addImm(0)
3969  .addReg(Src.getReg());
3970 
3971  BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
3972  .addReg(Src.getReg())
3973  .addReg(TmpReg);
3974 
3975  MRI.replaceRegWith(Dest.getReg(), ResultReg);
3976  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
3977 }
3978 
3979 void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
3980  MachineInstr &Inst) const {
3981  MachineBasicBlock &MBB = *Inst.getParent();
3983  MachineBasicBlock::iterator MII = Inst;
3984  const DebugLoc &DL = Inst.getDebugLoc();
3985 
3986  MachineOperand &Dest = Inst.getOperand(0);
3987  MachineOperand &Src0 = Inst.getOperand(1);
3988  MachineOperand &Src1 = Inst.getOperand(2);
3989 
3990  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
3991  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
3992 
3993  unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3994  BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
3995  .add(Src0)
3996  .add(Src1);
3997 
3998  unsigned Not = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3999  BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), Not)
4000  .addReg(Xor);
4001 
4002  MRI.replaceRegWith(Dest.getReg(), Not);
4003  addUsersToMoveToVALUWorklist(Not, MRI, Worklist);
4004 }
4005 
4006 void SIInstrInfo::splitScalar64BitUnaryOp(
4007  SetVectorType &Worklist, MachineInstr &Inst,
4008  unsigned Opcode) const {
4009  MachineBasicBlock &MBB = *Inst.getParent();
4011 
4012  MachineOperand &Dest = Inst.getOperand(0);
4013  MachineOperand &Src0 = Inst.getOperand(1);
4014  DebugLoc DL = Inst.getDebugLoc();
4015 
4016  MachineBasicBlock::iterator MII = Inst;
4017 
4018  const MCInstrDesc &InstDesc = get(Opcode);
4019  const TargetRegisterClass *Src0RC = Src0.isReg() ?
4020  MRI.getRegClass(Src0.getReg()) :
4021  &AMDGPU::SGPR_32RegClass;
4022 
4023  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
4024 
4025  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4026  AMDGPU::sub0, Src0SubRC);
4027 
4028  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
4029  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
4030  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
4031 
4032  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
4033  BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
4034 
4035  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4036  AMDGPU::sub1, Src0SubRC);
4037 
4038  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
4039  BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
4040 
4041  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
4042  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4043  .addReg(DestSub0)
4044  .addImm(AMDGPU::sub0)
4045  .addReg(DestSub1)
4046  .addImm(AMDGPU::sub1);
4047 
4048  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
4049 
4050  // We don't need to legalizeOperands here because for a single operand, src0
4051  // will support any kind of input.
4052 
4053  // Move all users of this moved value.
4054  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4055 }
4056 
4057 void SIInstrInfo::splitScalar64BitAddSub(
4058  SetVectorType &Worklist, MachineInstr &Inst) const {
4059  bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
4060 
4061  MachineBasicBlock &MBB = *Inst.getParent();
4063 
4064  unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4065  unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4066  unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4067 
4068  unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
4069  unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
4070 
4071  MachineOperand &Dest = Inst.getOperand(0);
4072  MachineOperand &Src0 = Inst.getOperand(1);
4073  MachineOperand &Src1 = Inst.getOperand(2);
4074  const DebugLoc &DL = Inst.getDebugLoc();
4075  MachineBasicBlock::iterator MII = Inst;
4076 
4077  const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg());
4078  const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg());
4079  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
4080  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
4081 
4082  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4083  AMDGPU::sub0, Src0SubRC);
4084  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4085  AMDGPU::sub0, Src1SubRC);
4086 
4087 
4088  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4089  AMDGPU::sub1, Src0SubRC);
4090  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4091  AMDGPU::sub1, Src1SubRC);
4092 
4093  unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
4094  MachineInstr *LoHalf =
4095  BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
4096  .addReg(CarryReg, RegState::Define)
4097  .add(SrcReg0Sub0)
4098  .add(SrcReg1Sub0);
4099 
4100  unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
4101  MachineInstr *HiHalf =
4102  BuildMI(MBB, MII, DL, get(HiOpc), DestSub1)
4103  .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
4104  .add(SrcReg0Sub1)
4105  .add(SrcReg1Sub1)
4106  .addReg(CarryReg, RegState::Kill);
4107 
4108  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4109  .addReg(DestSub0)
4110  .addImm(AMDGPU::sub0)
4111  .addReg(DestSub1)
4112  .addImm(AMDGPU::sub1);
4113 
4114  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
4115 
4116  // Try to legalize the operands in case we need to swap the order to keep it
4117  // valid.
4118  legalizeOperands(*LoHalf);
4119  legalizeOperands(*HiHalf);
4120 
4121  // Move all users of this moved vlaue.
4122  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4123 }
4124 
4125 void SIInstrInfo::splitScalar64BitBinaryOp(
4126  SetVectorType &Worklist, MachineInstr &Inst,
4127  unsigned Opcode) const {
4128  MachineBasicBlock &MBB = *Inst.getParent();
4130 
4131  MachineOperand &Dest = Inst.getOperand(0);
4132  MachineOperand &Src0 = Inst.getOperand(1);
4133  MachineOperand &Src1 = Inst.getOperand(2);
4134  DebugLoc DL = Inst.getDebugLoc();
4135 
4136  MachineBasicBlock::iterator MII = Inst;
4137 
4138  const MCInstrDesc &InstDesc = get(Opcode);
4139  const TargetRegisterClass *Src0RC = Src0.isReg() ?
4140  MRI.getRegClass(Src0.getReg()) :
4141  &AMDGPU::SGPR_32RegClass;
4142 
4143  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
4144  const TargetRegisterClass *Src1RC = Src1.isReg() ?
4145  MRI.getRegClass(Src1.getReg()) :
4146  &AMDGPU::SGPR_32RegClass;
4147 
4148  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
4149 
4150  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4151  AMDGPU::sub0, Src0SubRC);
4152  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4153  AMDGPU::sub0, Src1SubRC);
4154 
4155  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
4156  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
4157  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
4158 
4159  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
4160  MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
4161  .add(SrcReg0Sub0)
4162  .add(SrcReg1Sub0);
4163 
4164  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
4165  AMDGPU::sub1, Src0SubRC);
4166  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
4167  AMDGPU::sub1, Src1SubRC);
4168 
4169  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
4170  MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
4171  .add(SrcReg0Sub1)
4172  .add(SrcReg1Sub1);
4173 
4174  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
4175  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4176  .addReg(DestSub0)
4177  .addImm(AMDGPU::sub0)
4178  .addReg(DestSub1)
4179  .addImm(AMDGPU::sub1);
4180 
4181  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
4182 
4183  // Try to legalize the operands in case we need to swap the order to keep it
4184  // valid.
4185  legalizeOperands(LoHalf);
4186  legalizeOperands(HiHalf);
4187 
4188  // Move all users of this moved vlaue.
4189  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4190 }
4191 
4192 void SIInstrInfo::splitScalar64BitBCNT(
4193  SetVectorType &Worklist, MachineInstr &Inst) const {
4194  MachineBasicBlock &MBB = *Inst.getParent();
4196 
4197  MachineBasicBlock::iterator MII = Inst;
4198  DebugLoc DL = Inst.getDebugLoc();
4199 
4200  MachineOperand &Dest = Inst.getOperand(0);
4201  MachineOperand &Src = Inst.getOperand(1);
4202 
4203  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
4204  const TargetRegisterClass *SrcRC = Src.isReg() ?
4205  MRI.getRegClass(Src.getReg()) :
4206  &AMDGPU::SGPR_32RegClass;
4207 
4208  unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4209  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4210 
4211  const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
4212 
4213  MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
4214  AMDGPU::sub0, SrcSubRC);
4215  MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
4216  AMDGPU::sub1, SrcSubRC);
4217 
4218  BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
4219 
4220  BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
4221 
4222  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4223 
4224  // We don't need to legalize operands here. src0 for etiher instruction can be
4225  // an SGPR, and the second input is unused or determined here.
4226  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4227 }
4228 
4229 void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
4230  MachineInstr &Inst) const {
4231  MachineBasicBlock &MBB = *Inst.getParent();
4233  MachineBasicBlock::iterator MII = Inst;
4234  DebugLoc DL = Inst.getDebugLoc();
4235 
4236  MachineOperand &Dest = Inst.getOperand(0);
4237  uint32_t Imm = Inst.getOperand(2).getImm();
4238  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
4239  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
4240 
4241  (void) Offset;
4242 
4243  // Only sext_inreg cases handled.
4244  assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
4245  Offset == 0 && "Not implemented");
4246 
4247  if (BitWidth < 32) {
4248  unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4249  unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4250  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4251 
4252  BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
4253  .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
4254  .addImm(0)
4255  .addImm(BitWidth);
4256 
4257  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
4258  .addImm(31)
4259  .addReg(MidRegLo);
4260 
4261  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
4262  .addReg(MidRegLo)
4263  .addImm(AMDGPU::sub0)
4264  .addReg(MidRegHi)
4265  .addImm(AMDGPU::sub1);
4266 
4267  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4268  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4269  return;
4270  }
4271 
4272  MachineOperand &Src = Inst.getOperand(1);
4273  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4274  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
4275 
4276  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
4277  .addImm(31)
4278  .addReg(Src.getReg(), 0, AMDGPU::sub0);
4279 
4280  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
4281  .addReg(Src.getReg(), 0, AMDGPU::sub0)
4282  .addImm(AMDGPU::sub0)
4283  .addReg(TmpReg)
4284  .addImm(AMDGPU::sub1);
4285 
4286  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4287  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4288 }
4289 
4290 void SIInstrInfo::addUsersToMoveToVALUWorklist(
4291  unsigned DstReg,
4293  SetVectorType &Worklist) const {
4294  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
4295  E = MRI.use_end(); I != E;) {
4296  MachineInstr &UseMI = *I->getParent();
4297  if (!canReadVGPR(UseMI, I.getOperandNo())) {
4298  Worklist.insert(&UseMI);
4299 
4300  do {
4301  ++I;
4302  } while (I != E && I->getParent() == &UseMI);
4303  } else {
4304  ++I;
4305  }
4306  }
4307 }
4308 
4309 void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
4310  MachineRegisterInfo &MRI,
4311  MachineInstr &Inst) const {
4312  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4313  MachineBasicBlock *MBB = Inst.getParent();
4314  MachineOperand &Src0 = Inst.getOperand(1);
4315  MachineOperand &Src1 = Inst.getOperand(2);
4316  const DebugLoc &DL = Inst.getDebugLoc();
4317 
4318  switch (Inst.getOpcode()) {
4319  case AMDGPU::S_PACK_LL_B32_B16: {
4320  unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4321  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4322 
4323  // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
4324  // 0.
4325  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
4326  .addImm(0xffff);
4327 
4328  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
4329  .addReg(ImmReg, RegState::Kill)
4330  .add(Src0);
4331 
4332  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
4333  .add(Src1)
4334  .addImm(16)
4335  .addReg(TmpReg, RegState::Kill);
4336  break;
4337  }
4338  case AMDGPU::S_PACK_LH_B32_B16: {
4339  unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4340  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
4341  .addImm(0xffff);
4342  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
4343  .addReg(ImmReg, RegState::Kill)
4344  .add(Src0)
4345  .add(Src1);
4346  break;
4347  }
4348  case AMDGPU::S_PACK_HH_B32_B16: {
4349  unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4350  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4351  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
4352  .addImm(16)
4353  .add(Src0);
4354  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
4355  .addImm(0xffff0000);
4356  BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
4357  .add(Src1)
4358  .addReg(ImmReg, RegState::Kill)
4359  .addReg(TmpReg, RegState::Kill);
4360  break;
4361  }
4362  default:
4363  llvm_unreachable("unhandled s_pack_* instruction");
4364  }
4365 
4366  MachineOperand &Dest = Inst.getOperand(0);
4367  MRI.replaceRegWith(Dest.getReg(), ResultReg);
4368  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4369 }
4370 
4371 void SIInstrInfo::addSCCDefUsersToVALUWorklist(
4372  MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
4373  // This assumes that all the users of SCC are in the same block
4374  // as the SCC def.
4375  for (MachineInstr &MI :
4377  SCCDefInst.getParent()->end())) {
4378  // Exit if we find another SCC def.
4379  if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
4380  return;
4381 
4382  if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
4383  Worklist.insert(&MI);
4384  }
4385 }
4386 
4387 const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
4388  const MachineInstr &Inst) const {
4389  const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
4390 
4391  switch (Inst.getOpcode()) {
4392  // For target instructions, getOpRegClass just returns the virtual register
4393  // class associated with the operand, so we need to find an equivalent VGPR
4394  // register class in order to move the instruction to the VALU.
4395  case AMDGPU::COPY:
4396  case AMDGPU::PHI:
4397  case AMDGPU::REG_SEQUENCE:
4398  case AMDGPU::INSERT_SUBREG:
4399  case AMDGPU::WQM:
4400  case AMDGPU::WWM:
4401  if (RI.hasVGPRs(NewDstRC))
4402  return nullptr;
4403 
4404  NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
4405  if (!NewDstRC)
4406  return nullptr;
4407  return NewDstRC;
4408  default:
4409  return NewDstRC;
4410  }
4411 }
4412 
4413 // Find the one SGPR operand we are allowed to use.
4414 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
4415  int OpIndices[3]) const {
4416  const MCInstrDesc &Desc = MI.getDesc();
4417 
4418  // Find the one SGPR operand we are allowed to use.
4419  //
4420  // First we need to consider the instruction's operand requirements before
4421  // legalizing. Some operands are required to be SGPRs, such as implicit uses
4422  // of VCC, but we are still bound by the constant bus requirement to only use
4423  // one.
4424  //
4425  // If the operand's class is an SGPR, we can never move it.
4426 
4427  unsigned SGPRReg = findImplicitSGPRRead(MI);
4428  if (SGPRReg != AMDGPU::NoRegister)
4429  return SGPRReg;
4430 
4431  unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
4432  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4433 
4434  for (unsigned i = 0; i < 3; ++i) {
4435  int Idx = OpIndices[i];
4436  if (Idx == -1)
4437  break;
4438 
4439  const MachineOperand &MO = MI.getOperand(Idx);
4440  if (!MO.isReg())
4441  continue;
4442 
4443  // Is this operand statically required to be an SGPR based on the operand
4444  // constraints?
4445  const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
4446  bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
4447  if (IsRequiredSGPR)
4448  return MO.getReg();
4449 
4450  // If this could be a VGPR or an SGPR, Check the dynamic register class.
4451  unsigned Reg = MO.getReg();
4452  const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
4453  if (RI.isSGPRClass(RegRC))
4454  UsedSGPRs[i] = Reg;
4455  }
4456 
4457  // We don't have a required SGPR operand, so we have a bit more freedom in
4458  // selecting operands to move.
4459 
4460  // Try to select the most used SGPR. If an SGPR is equal to one of the
4461  // others, we choose that.
4462  //
4463  // e.g.
4464  // V_FMA_F32 v0, s0, s0, s0 -> No moves
4465  // V_FMA_F32 v0, s0, s1, s0 -> Move s1
4466 
4467  // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
4468  // prefer those.
4469 
4470  if (UsedSGPRs[0] != AMDGPU::NoRegister) {
4471  if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
4472  SGPRReg = UsedSGPRs[0];
4473  }
4474 
4475  if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
4476  if (UsedSGPRs[1] == UsedSGPRs[2])
4477  SGPRReg = UsedSGPRs[1];
4478  }
4479 
4480  return SGPRReg;
4481 }
4482 
4484  unsigned OperandName) const {
4485  int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
4486  if (Idx == -1)
4487  return nullptr;
4488 
4489  return &MI.getOperand(Idx);
4490 }
4491 
4493  uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
4494  if (ST.isAmdHsaOS()) {
4495  // Set ATC = 1. GFX9 doesn't have this bit.
4497  RsrcDataFormat |= (1ULL << 56);
4498 
4499  // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
4500  // BTW, it disables TC L2 and therefore decreases performance.
4502  RsrcDataFormat |= (2ULL << 59);
4503  }
4504 
4505  return RsrcDataFormat;
4506 }
4507 
4509  uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
4511  0xffffffff; // Size;
4512 
4513  // GFX9 doesn't have ELEMENT_SIZE.
4515  uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
4516  Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
4517  }
4518 
4519  // IndexStride = 64.
4520  Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
4521 
4522  // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
4523  // Clear them unless we want a huge stride.
4525  Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
4526 
4527  return Rsrc23;
4528 }
4529 
4531  unsigned Opc = MI.getOpcode();
4532 
4533  return isSMRD(Opc);
4534 }
4535 
4537  unsigned Opc = MI.getOpcode();
4538 
4539  return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
4540 }
4541 
4543  int &FrameIndex) const {
4544  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
4545  if (!Addr || !Addr->isFI())
4546  return AMDGPU::NoRegister;
4547 
4548  assert(!MI.memoperands_empty() &&
4550 
4551  FrameIndex = Addr->getIndex();
4552  return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
4553 }
4554 
4556  int &FrameIndex) const {
4557  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
4558  assert(Addr && Addr->isFI());
4559  FrameIndex = Addr->getIndex();
4560  return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
4561 }
4562 
4564  int &FrameIndex) const {
4565  if (!MI.mayLoad())
4566  return AMDGPU::NoRegister;
4567 
4568  if (isMUBUF(MI) || isVGPRSpill(MI))
4569  return isStackAccess(MI, FrameIndex);
4570 
4571  if (isSGPRSpill(MI))
4572  return isSGPRStackAccess(MI, FrameIndex);
4573 
4574  return AMDGPU::NoRegister;
4575 }
4576 
4578  int &FrameIndex) const {
4579  if (!MI.mayStore())
4580  return AMDGPU::NoRegister;
4581 
4582  if (isMUBUF(MI) || isVGPRSpill(MI))
4583  return isStackAccess(MI, FrameIndex);
4584 
4585  if (isSGPRSpill(MI))
4586  return isSGPRStackAccess(MI, FrameIndex);
4587 
4588  return AMDGPU::NoRegister;
4589 }
4590 
4592  unsigned Size = 0;
4595  while (++I != E && I->isInsideBundle()) {
4596  assert(!I->isBundle() && "No nested bundle!");
4597  Size += getInstSizeInBytes(*I);
4598  }
4599 
4600  return Size;
4601 }
4602 
4604  unsigned Opc = MI.getOpcode();
4605  const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
4606  unsigned DescSize = Desc.getSize();
4607 
4608  // If we have a definitive size, we can use it. Otherwise we need to inspect
4609  // the operands to know the size.
4610  //
4611  // FIXME: Instructions that have a base 32-bit encoding report their size as
4612  // 4, even though they are really 8 bytes if they have a literal operand.
4613  if (DescSize != 0 && DescSize != 4)
4614  return DescSize;
4615 
4616  // 4-byte instructions may have a 32-bit literal encoded after them. Check
4617  // operands that coud ever be literals.
4618  if (isVALU(MI) || isSALU(MI)) {
4619  if (isFixedSize(MI))
4620  return DescSize;
4621 
4622  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4623  if (Src0Idx == -1)
4624  return 4; // No operands.
4625 
4626  if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
4627  return 8;
4628 
4629  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4630  if (Src1Idx == -1)
4631  return 4;
4632 
4633  if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
4634  return 8;
4635 
4636  return 4;
4637  }
4638 
4639  if (DescSize == 4)
4640  return 4;
4641 
4642  switch (Opc) {
4643  case TargetOpcode::IMPLICIT_DEF:
4644  case TargetOpcode::KILL:
4645  case TargetOpcode::DBG_VALUE:
4647  return 0;
4648  case TargetOpcode::BUNDLE:
4649  return getInstBundleSize(MI);
4650  case TargetOpcode::INLINEASM: {
4651  const MachineFunction *MF = MI.getParent()->getParent();
4652  const char *AsmStr = MI.getOperand(0).getSymbolName();
4653  return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
4654  }
4655  default:
4656  llvm_unreachable("unable to find instruction size");
4657  }
4658 }
4659 
4661  if (!isFLAT(MI))
4662  return false;
4663 
4664  if (MI.memoperands_empty())
4665  return true;
4666 
4667  for (const MachineMemOperand *MMO : MI.memoperands()) {
4668  if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
4669  return true;
4670  }
4671  return false;
4672 }
4673 
4675  return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
4676 }
4677 
4679  MachineBasicBlock *IfEnd) const {
4681  assert(TI != IfEntry->end());
4682 
4683  MachineInstr *Branch = &(*TI);
4684  MachineFunction *MF = IfEntry->getParent();
4685  MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
4686 
4687  if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
4688  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4689  MachineInstr *SIIF =
4690  BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
4691  .add(Branch->getOperand(0))
4692  .add(Branch->getOperand(1));
4693  MachineInstr *SIEND =
4694  BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
4695  .addReg(DstReg);
4696 
4697  IfEntry->erase(TI);
4698  IfEntry->insert(IfEntry->end(), SIIF);
4699  IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
4700  }
4701 }
4702 
4704  MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const {
4706  // We expect 2 terminators, one conditional and one unconditional.
4707  assert(TI != LoopEnd->end());
4708 
4709  MachineInstr *Branch = &(*TI);
4710  MachineFunction *MF = LoopEnd->getParent();
4711  MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
4712 
4713  if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
4714 
4715  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4716  unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4717  MachineInstrBuilder HeaderPHIBuilder =
4718  BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
4719  for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
4720  E = LoopEntry->pred_end();
4721  PI != E; ++PI) {
4722  if (*PI == LoopEnd) {
4723  HeaderPHIBuilder.addReg(BackEdgeReg);
4724  } else {
4725  MachineBasicBlock *PMBB = *PI;
4726  unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4728  ZeroReg, 0);
4729  HeaderPHIBuilder.addReg(ZeroReg);
4730  }
4731  HeaderPHIBuilder.addMBB(*PI);
4732  }
4733  MachineInstr *HeaderPhi = HeaderPHIBuilder;
4734  MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
4735  get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
4736  .addReg(DstReg)
4737  .add(Branch->getOperand(0));
4738  MachineInstr *SILOOP =
4739  BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
4740  .addReg(BackEdgeReg)
4741  .addMBB(LoopEntry);
4742 
4743  LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
4744  LoopEnd->erase(TI);
4745  LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
4746  LoopEnd->insert(LoopEnd->end(), SILOOP);
4747  }
4748 }
4749 
4752  static const std::pair<int, const char *> TargetIndices[] = {
4753  {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
4754  {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
4755  {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
4756  {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
4757  {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
4758  return makeArrayRef(TargetIndices);
4759 }
4760 
4761 /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
4762 /// post-RA version of misched uses CreateTargetMIHazardRecognizer.
4765  const ScheduleDAG *DAG) const {
4766  return new GCNHazardRecognizer(DAG->MF);
4767 }
4768 
4769 /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
4770 /// pass.
4773  return new GCNHazardRecognizer(MF);
4774 }
4775 
4776 std::pair<unsigned, unsigned>
4778  return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
4779 }
4780 
4783  static const std::pair<unsigned, const char *> TargetFlags[] = {
4784  { MO_GOTPCREL, "amdgpu-gotprel" },
4785  { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
4786  { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
4787  { MO_REL32_LO, "amdgpu-rel32-lo" },
4788  { MO_REL32_HI, "amdgpu-rel32-hi" }
4789  };
4790 
4791  return makeArrayRef(TargetFlags);
4792 }
4793 
4795  return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
4796  MI.modifiesRegister(AMDGPU::EXEC, &RI);
4797 }
4798 
4802  const DebugLoc &DL,
4803  unsigned DestReg) const {
4804  if (ST.hasAddNoCarry())
4805  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
4806 
4807  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4808  unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4809  MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC);
4810 
4811  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
4812  .addReg(UnusedCarry, RegState::Define | RegState::Dead);
4813 }
4814 
4815 bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
4816  switch (Opcode) {
4817  case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
4818  case AMDGPU::SI_KILL_I1_TERMINATOR:
4819  return true;
4820  default:
4821  return false;
4822  }
4823 }
4824 
4826  switch (Opcode) {
4827  case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
4828  return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
4829  case AMDGPU::SI_KILL_I1_PSEUDO:
4830  return get(AMDGPU::SI_KILL_I1_TERMINATOR);
4831  default:
4832  llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO");
4833  }
4834 }
unsigned getTargetFlags() const
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
uint64_t CallInst * C
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:515
unsigned getVALUOp(const MachineInstr &MI) const
const MachineInstrBuilder & add(const MachineOperand &MO) const
void legalizeOperands(MachineInstr &MI) const
Legalize all operands in this instruction.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isVGPRSpillingEnabled(const Function &F) const
static bool isSGPRSpill(const MachineInstr &MI)
Definition: SIInstrInfo.h:515
Interface definition for SIRegisterInfo.
bool hasRegisterImplicitUseOperand(unsigned Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:970
Generation getGeneration() const
bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool IsDead
instr_iterator instr_end()
bool isVGPRCopy(const MachineInstr &MI) const
Definition: SIInstrInfo.h:605
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:228
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool sopkIsZext(const MachineInstr &MI)
Definition: SIInstrInfo.h:559
uint64_t getDefaultRsrcDataFormat() const
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static bool isStride64(unsigned Opc)
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:271
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
void setIsUndef(bool Val=true)
unsigned insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
unsigned getSubReg() const
bool isInlineAsm() const
Definition: MachineInstr.h:835
static bool isSOPK(const MachineInstr &MI)
Definition: SIInstrInfo.h:342
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
uint64_t getSize() const
Return the size in bytes of the memory reference.
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
Definition: SIInstrInfo.h:822
MachineBasicBlock reference.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
A debug info location.
Definition: DebugLoc.h:34
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:399
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc)
static SDValue findChainOperand(SDNode *Load)
Definition: SIInstrInfo.cpp:86
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:414
void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
return AArch64::GPR64RegClass contains(Reg)
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:528
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static unsigned getAddrSpace(StringRef R)
Definition: DataLayout.cpp:228
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
static bool isFixedSize(const MachineInstr &MI)
Definition: SIInstrInfo.h:577
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:570
unsigned getSubRegFromChannel(unsigned Channel) const
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:435
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
A description of a memory reference used in the backend.
static use_iterator use_end()
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
bool hasInv2PiInlineImm() const
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:467
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
uint64_t getScratchRsrcWords23() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:443
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void setImplicit(bool Val=true)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool opCanUseInlineConstant(unsigned OpType) const
void setHasSpilledVGPRs(bool Spill=true)
Name of external global symbol.
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:477
void insertWaitStates(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Count) const
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
bool opCanUseLiteralConstant(unsigned OpType) const
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
Definition: SIInstrInfo.h:835
const char * getSymbolName() const
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:302
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getAddressSpaceForPseudoSourceKind(PseudoSourceValue::PSVKind Kind) const override
unsigned getID() const
Return the register class ID number.
bool isSGPRClass(const TargetRegisterClass *RC) const
Definition: