LLVM  10.0.0svn
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIInstrInfo.h"
14 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Support/Debug.h"
24 
25 #define DEBUG_TYPE "si-fold-operands"
26 using namespace llvm;
27 
28 namespace {
29 
30 struct FoldCandidate {
32  union {
33  MachineOperand *OpToFold;
34  uint64_t ImmToFold;
35  int FrameIndexToFold;
36  };
37  int ShrinkOpcode;
38  unsigned char UseOpNo;
40  bool Commuted;
41 
42  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
43  bool Commuted_ = false,
44  int ShrinkOp = -1) :
45  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
46  Kind(FoldOp->getType()),
47  Commuted(Commuted_) {
48  if (FoldOp->isImm()) {
49  ImmToFold = FoldOp->getImm();
50  } else if (FoldOp->isFI()) {
51  FrameIndexToFold = FoldOp->getIndex();
52  } else {
53  assert(FoldOp->isReg() || FoldOp->isGlobal());
54  OpToFold = FoldOp;
55  }
56  }
57 
58  bool isFI() const {
59  return Kind == MachineOperand::MO_FrameIndex;
60  }
61 
62  bool isImm() const {
63  return Kind == MachineOperand::MO_Immediate;
64  }
65 
66  bool isReg() const {
67  return Kind == MachineOperand::MO_Register;
68  }
69 
70  bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
71 
72  bool isCommuted() const {
73  return Commuted;
74  }
75 
76  bool needsShrink() const {
77  return ShrinkOpcode != -1;
78  }
79 
80  int getShrinkOpcode() const {
81  return ShrinkOpcode;
82  }
83 };
84 
85 class SIFoldOperands : public MachineFunctionPass {
86 public:
87  static char ID;
89  const SIInstrInfo *TII;
90  const SIRegisterInfo *TRI;
91  const GCNSubtarget *ST;
92  const SIMachineFunctionInfo *MFI;
93 
94  void foldOperand(MachineOperand &OpToFold,
96  int UseOpIdx,
98  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
99 
100  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
101 
102  const MachineOperand *isClamp(const MachineInstr &MI) const;
103  bool tryFoldClamp(MachineInstr &MI);
104 
105  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
106  bool tryFoldOMod(MachineInstr &MI);
107 
108 public:
109  SIFoldOperands() : MachineFunctionPass(ID) {
111  }
112 
113  bool runOnMachineFunction(MachineFunction &MF) override;
114 
115  StringRef getPassName() const override { return "SI Fold Operands"; }
116 
117  void getAnalysisUsage(AnalysisUsage &AU) const override {
118  AU.setPreservesCFG();
120  }
121 };
122 
123 } // End anonymous namespace.
124 
125 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
126  "SI Fold Operands", false, false)
127 
128 char SIFoldOperands::ID = 0;
129 
131 
132 // Wrapper around isInlineConstant that understands special cases when
133 // instruction types are replaced during operand folding.
135  const MachineInstr &UseMI,
136  unsigned OpNo,
137  const MachineOperand &OpToFold) {
138  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
139  return true;
140 
141  unsigned Opc = UseMI.getOpcode();
142  switch (Opc) {
143  case AMDGPU::V_MAC_F32_e64:
144  case AMDGPU::V_MAC_F16_e64:
145  case AMDGPU::V_FMAC_F32_e64:
146  case AMDGPU::V_FMAC_F16_e64: {
147  // Special case for mac. Since this is replaced with mad when folded into
148  // src2, we need to check the legality for the final instruction.
149  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
150  if (static_cast<int>(OpNo) == Src2Idx) {
151  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
152  Opc == AMDGPU::V_FMAC_F16_e64;
153  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
154  Opc == AMDGPU::V_FMAC_F32_e64;
155 
156  unsigned Opc = IsFMA ?
157  (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
158  (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
159  const MCInstrDesc &MadDesc = TII->get(Opc);
160  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
161  }
162  return false;
163  }
164  default:
165  return false;
166  }
167 }
168 
169 // TODO: Add heuristic that the frame index might not fit in the addressing mode
170 // immediate offset to avoid materializing in loops.
171 static bool frameIndexMayFold(const SIInstrInfo *TII,
172  const MachineInstr &UseMI,
173  int OpNo,
174  const MachineOperand &OpToFold) {
175  return OpToFold.isFI() &&
176  (TII->isMUBUF(UseMI) || TII->isFLATScratch(UseMI)) &&
177  OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr);
178 }
179 
181  return new SIFoldOperands();
182 }
183 
184 static bool updateOperand(FoldCandidate &Fold,
185  const SIInstrInfo &TII,
186  const TargetRegisterInfo &TRI,
187  const GCNSubtarget &ST) {
188  MachineInstr *MI = Fold.UseMI;
189  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
190  assert(Old.isReg());
191 
192  if (Fold.isImm()) {
193  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
194  !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
195  AMDGPU::isInlinableLiteralV216(static_cast<uint16_t>(Fold.ImmToFold),
196  ST.hasInv2PiInlineImm())) {
197  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
198  // already set.
199  unsigned Opcode = MI->getOpcode();
200  int OpNo = MI->getOperandNo(&Old);
201  int ModIdx = -1;
202  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
203  ModIdx = AMDGPU::OpName::src0_modifiers;
204  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
205  ModIdx = AMDGPU::OpName::src1_modifiers;
206  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
207  ModIdx = AMDGPU::OpName::src2_modifiers;
208  assert(ModIdx != -1);
209  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
210  MachineOperand &Mod = MI->getOperand(ModIdx);
211  unsigned Val = Mod.getImm();
212  if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
213  return false;
214  // Only apply the following transformation if that operand requries
215  // a packed immediate.
216  switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
221  // If upper part is all zero we do not need op_sel_hi.
222  if (!isUInt<16>(Fold.ImmToFold)) {
223  if (!(Fold.ImmToFold & 0xffff)) {
224  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
225  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
226  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
227  return true;
228  }
229  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
230  Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
231  return true;
232  }
233  break;
234  default:
235  break;
236  }
237  }
238  }
239 
240  if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
241  MachineBasicBlock *MBB = MI->getParent();
242  auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI, 16);
243  if (Liveness != MachineBasicBlock::LQR_Dead) {
244  LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n");
245  return false;
246  }
247 
249  int Op32 = Fold.getShrinkOpcode();
250  MachineOperand &Dst0 = MI->getOperand(0);
251  MachineOperand &Dst1 = MI->getOperand(1);
252  assert(Dst0.isDef() && Dst1.isDef());
253 
254  bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
255 
256  const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
257  Register NewReg0 = MRI.createVirtualRegister(Dst0RC);
258 
259  MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
260 
261  if (HaveNonDbgCarryUse) {
262  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
263  .addReg(AMDGPU::VCC, RegState::Kill);
264  }
265 
266  // Keep the old instruction around to avoid breaking iterators, but
267  // replace it with a dummy instruction to remove uses.
268  //
269  // FIXME: We should not invert how this pass looks at operands to avoid
270  // this. Should track set of foldable movs instead of looking for uses
271  // when looking at a use.
272  Dst0.setReg(NewReg0);
273  for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
274  MI->RemoveOperand(I);
275  MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
276 
277  if (Fold.isCommuted())
278  TII.commuteInstruction(*Inst32, false);
279  return true;
280  }
281 
282  assert(!Fold.needsShrink() && "not handled");
283 
284  if (Fold.isImm()) {
285  Old.ChangeToImmediate(Fold.ImmToFold);
286  return true;
287  }
288 
289  if (Fold.isGlobal()) {
290  Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
291  Fold.OpToFold->getTargetFlags());
292  return true;
293  }
294 
295  if (Fold.isFI()) {
296  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
297  return true;
298  }
299 
300  MachineOperand *New = Fold.OpToFold;
301  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
302  Old.setIsUndef(New->isUndef());
303  return true;
304 }
305 
307  const MachineInstr *MI) {
308  for (auto Candidate : FoldList) {
309  if (Candidate.UseMI == MI)
310  return true;
311  }
312  return false;
313 }
314 
316  MachineInstr *MI, unsigned OpNo,
317  MachineOperand *OpToFold,
318  const SIInstrInfo *TII) {
319  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
320  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
321  unsigned Opc = MI->getOpcode();
322  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
323  Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
324  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
325  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 ||
326  Opc == AMDGPU::V_FMAC_F16_e64;
327  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 ||
328  Opc == AMDGPU::V_FMAC_F32_e64;
329  unsigned NewOpc = IsFMA ?
330  (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) :
331  (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
332 
333  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
334  // to fold the operand.
335  MI->setDesc(TII->get(NewOpc));
336  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
337  if (FoldAsMAD) {
338  MI->untieRegOperand(OpNo);
339  return true;
340  }
341  MI->setDesc(TII->get(Opc));
342  }
343 
344  // Special case for s_setreg_b32
345  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
346  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
347  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
348  return true;
349  }
350 
351  // If we are already folding into another operand of MI, then
352  // we can't commute the instruction, otherwise we risk making the
353  // other fold illegal.
354  if (isUseMIInFoldList(FoldList, MI))
355  return false;
356 
357  unsigned CommuteOpNo = OpNo;
358 
359  // Operand is not legal, so try to commute the instruction to
360  // see if this makes it possible to fold.
361  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
362  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
363  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
364 
365  if (CanCommute) {
366  if (CommuteIdx0 == OpNo)
367  CommuteOpNo = CommuteIdx1;
368  else if (CommuteIdx1 == OpNo)
369  CommuteOpNo = CommuteIdx0;
370  }
371 
372 
373  // One of operands might be an Imm operand, and OpNo may refer to it after
374  // the call of commuteInstruction() below. Such situations are avoided
375  // here explicitly as OpNo must be a register operand to be a candidate
376  // for memory folding.
377  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
378  !MI->getOperand(CommuteIdx1).isReg()))
379  return false;
380 
381  if (!CanCommute ||
382  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
383  return false;
384 
385  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
386  if ((Opc == AMDGPU::V_ADD_I32_e64 ||
387  Opc == AMDGPU::V_SUB_I32_e64 ||
388  Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
389  (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
391 
392  // Verify the other operand is a VGPR, otherwise we would violate the
393  // constant bus restriction.
394  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
395  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
396  if (!OtherOp.isReg() ||
397  !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
398  return false;
399 
400  assert(MI->getOperand(1).isDef());
401 
402  // Make sure to get the 32-bit version of the commuted opcode.
403  unsigned MaybeCommutedOpc = MI->getOpcode();
404  int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
405 
406  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
407  Op32));
408  return true;
409  }
410 
411  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
412  return false;
413  }
414 
415  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
416  return true;
417  }
418 
419  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
420  return true;
421 }
422 
423 // If the use operand doesn't care about the value, this may be an operand only
424 // used for register indexing, in which case it is unsafe to fold.
425 static bool isUseSafeToFold(const SIInstrInfo *TII,
426  const MachineInstr &MI,
427  const MachineOperand &UseMO) {
428  return !UseMO.isUndef() && !TII->isSDWA(MI);
429  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
430 }
431 
432 static bool tryToFoldACImm(const SIInstrInfo *TII,
433  const MachineOperand &OpToFold,
435  unsigned UseOpIdx,
436  SmallVectorImpl<FoldCandidate> &FoldList) {
437  const MCInstrDesc &Desc = UseMI->getDesc();
438  const MCOperandInfo *OpInfo = Desc.OpInfo;
439  if (!OpInfo || UseOpIdx >= Desc.getNumOperands())
440  return false;
441 
442  uint8_t OpTy = OpInfo[UseOpIdx].OperandType;
445  return false;
446 
447  if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) &&
448  TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
449  UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
450  return true;
451  }
452 
453  if (!OpToFold.isReg())
454  return false;
455 
456  Register UseReg = OpToFold.getReg();
457  if (!Register::isVirtualRegister(UseReg))
458  return false;
459 
460  if (llvm::find_if(FoldList, [UseMI](const FoldCandidate &FC) {
461  return FC.UseMI == UseMI; }) != FoldList.end())
462  return false;
463 
465  const MachineInstr *Def = MRI.getUniqueVRegDef(UseReg);
466  if (!Def || !Def->isRegSequence())
467  return false;
468 
469  int64_t Imm;
471  for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
472  const MachineOperand &Sub = Def->getOperand(I);
473  if (!Sub.isReg() || Sub.getSubReg())
474  return false;
475  MachineInstr *SubDef = MRI.getUniqueVRegDef(Sub.getReg());
476  while (SubDef && !SubDef->isMoveImmediate() &&
477  !SubDef->getOperand(1).isImm() && TII->isFoldableCopy(*SubDef))
478  SubDef = MRI.getUniqueVRegDef(SubDef->getOperand(1).getReg());
479  if (!SubDef || !SubDef->isMoveImmediate() || !SubDef->getOperand(1).isImm())
480  return false;
481  Op = &SubDef->getOperand(1);
482  auto SubImm = Op->getImm();
483  if (I == 1) {
484  if (!TII->isInlineConstant(SubDef->getOperand(1), OpTy))
485  return false;
486 
487  Imm = SubImm;
488  continue;
489  }
490  if (Imm != SubImm)
491  return false; // Can only fold splat constants
492  }
493 
494  if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
495  return false;
496 
497  FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
498  return true;
499 }
500 
501 void SIFoldOperands::foldOperand(
502  MachineOperand &OpToFold,
504  int UseOpIdx,
506  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
507  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
508 
509  if (!isUseSafeToFold(TII, *UseMI, UseOp))
510  return;
511 
512  // FIXME: Fold operands with subregs.
513  if (UseOp.isReg() && OpToFold.isReg()) {
514  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
515  return;
516 
517  // Don't fold subregister extracts into tied operands, only if it is a full
518  // copy since a subregister use tied to a full register def doesn't really
519  // make sense. e.g. don't fold:
520  //
521  // %1 = COPY %0:sub1
522  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
523  //
524  // into
525  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
526  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
527  return;
528  }
529 
530  // Special case for REG_SEQUENCE: We can't fold literals into
531  // REG_SEQUENCE instructions, so we have to fold them into the
532  // uses of REG_SEQUENCE.
533  if (UseMI->isRegSequence()) {
534  Register RegSeqDstReg = UseMI->getOperand(0).getReg();
535  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
536 
539  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
540  RSUse != RSE; RSUse = Next) {
541  Next = std::next(RSUse);
542 
543  MachineInstr *RSUseMI = RSUse->getParent();
544 
545  if (tryToFoldACImm(TII, UseMI->getOperand(0), RSUseMI,
546  RSUse.getOperandNo(), FoldList))
547  continue;
548 
549  if (RSUse->getSubReg() != RegSeqDstSubReg)
550  continue;
551 
552  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
553  CopiesToReplace);
554  }
555 
556  return;
557  }
558 
559  if (tryToFoldACImm(TII, OpToFold, UseMI, UseOpIdx, FoldList))
560  return;
561 
562  if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
563  // Sanity check that this is a stack access.
564  // FIXME: Should probably use stack pseudos before frame lowering.
565  MachineOperand *SOff = TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
566  if (!SOff->isReg() || (SOff->getReg() != MFI->getScratchWaveOffsetReg() &&
567  SOff->getReg() != MFI->getStackPtrOffsetReg()))
568  return;
569 
570  if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
571  MFI->getScratchRSrcReg())
572  return;
573 
574  // A frame index will resolve to a positive constant, so it should always be
575  // safe to fold the addressing mode, even pre-GFX9.
576  UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
577  SOff->setReg(MFI->getStackPtrOffsetReg());
578  return;
579  }
580 
581  bool FoldingImmLike =
582  OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
583 
584  if (FoldingImmLike && UseMI->isCopy()) {
585  Register DestReg = UseMI->getOperand(0).getReg();
586 
587  // Don't fold into a copy to a physical register. Doing so would interfere
588  // with the register coalescer's logic which would avoid redundant
589  // initalizations.
590  if (DestReg.isPhysical())
591  return;
592 
593  const TargetRegisterClass *DestRC = MRI->getRegClass(DestReg);
594 
595  Register SrcReg = UseMI->getOperand(1).getReg();
596  if (SrcReg.isVirtual()) { // XXX - This can be an assert?
597  const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
598  if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
602  Use = MRI->use_begin(DestReg), E = MRI->use_end();
603  Use != E; Use = NextUse) {
604  NextUse = std::next(Use);
605  FoldCandidate FC = FoldCandidate(Use->getParent(),
606  Use.getOperandNo(), &UseMI->getOperand(1));
607  CopyUses.push_back(FC);
608  }
609  for (auto & F : CopyUses) {
610  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
611  FoldList, CopiesToReplace);
612  }
613  }
614  }
615 
616  if (DestRC == &AMDGPU::AGPR_32RegClass &&
617  TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
618  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
619  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
620  CopiesToReplace.push_back(UseMI);
621  return;
622  }
623 
624  // In order to fold immediates into copies, we need to change the
625  // copy to a MOV.
626 
627  unsigned MovOp = TII->getMovOpcode(DestRC);
628  if (MovOp == AMDGPU::COPY)
629  return;
630 
631  UseMI->setDesc(TII->get(MovOp));
632  MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
633  MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
634  while (ImpOpI != ImpOpE) {
635  MachineInstr::mop_iterator Tmp = ImpOpI;
636  ImpOpI++;
637  UseMI->RemoveOperand(UseMI->getOperandNo(Tmp));
638  }
639  CopiesToReplace.push_back(UseMI);
640  } else {
641  if (UseMI->isCopy() && OpToFold.isReg() &&
643  TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
644  TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) &&
645  !UseMI->getOperand(1).getSubReg()) {
646  unsigned Size = TII->getOpSize(*UseMI, 1);
647  UseMI->getOperand(1).setReg(OpToFold.getReg());
648  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
649  UseMI->getOperand(1).setIsKill(false);
650  CopiesToReplace.push_back(UseMI);
651  OpToFold.setIsKill(false);
652  if (Size != 4)
653  return;
654  if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
655  TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()))
656  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
657  else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
658  TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
659  UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32));
660  return;
661  }
662 
663  unsigned UseOpc = UseMI->getOpcode();
664  if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
665  (UseOpc == AMDGPU::V_READLANE_B32 &&
666  (int)UseOpIdx ==
667  AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
668  // %vgpr = V_MOV_B32 imm
669  // %sgpr = V_READFIRSTLANE_B32 %vgpr
670  // =>
671  // %sgpr = S_MOV_B32 imm
672  if (FoldingImmLike) {
674  UseMI->getOperand(UseOpIdx).getReg(),
675  *OpToFold.getParent(),
676  *UseMI))
677  return;
678 
679  UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
680 
681  // FIXME: ChangeToImmediate should clear subreg
682  UseMI->getOperand(1).setSubReg(0);
683  if (OpToFold.isImm())
684  UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
685  else
686  UseMI->getOperand(1).ChangeToFrameIndex(OpToFold.getIndex());
687  UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
688  return;
689  }
690 
691  if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
693  UseMI->getOperand(UseOpIdx).getReg(),
694  *OpToFold.getParent(),
695  *UseMI))
696  return;
697 
698  // %vgpr = COPY %sgpr0
699  // %sgpr1 = V_READFIRSTLANE_B32 %vgpr
700  // =>
701  // %sgpr1 = COPY %sgpr0
702  UseMI->setDesc(TII->get(AMDGPU::COPY));
703  UseMI->getOperand(1).setReg(OpToFold.getReg());
704  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
705  UseMI->getOperand(1).setIsKill(false);
706  UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
707  return;
708  }
709  }
710 
711  const MCInstrDesc &UseDesc = UseMI->getDesc();
712 
713  // Don't fold into target independent nodes. Target independent opcodes
714  // don't have defined register classes.
715  if (UseDesc.isVariadic() ||
716  UseOp.isImplicit() ||
717  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
718  return;
719  }
720 
721  if (!FoldingImmLike) {
722  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
723 
724  // FIXME: We could try to change the instruction from 64-bit to 32-bit
725  // to enable more folding opportunites. The shrink operands pass
726  // already does this.
727  return;
728  }
729 
730 
731  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
732  const TargetRegisterClass *FoldRC =
733  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
734 
735  // Split 64-bit constants into 32-bits for folding.
736  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
737  Register UseReg = UseOp.getReg();
738  const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
739 
740  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
741  return;
742 
743  APInt Imm(64, OpToFold.getImm());
744  if (UseOp.getSubReg() == AMDGPU::sub0) {
745  Imm = Imm.getLoBits(32);
746  } else {
747  assert(UseOp.getSubReg() == AMDGPU::sub1);
748  Imm = Imm.getHiBits(32);
749  }
750 
751  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
752  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
753  return;
754  }
755 
756 
757 
758  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
759 }
760 
761 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
762  uint32_t LHS, uint32_t RHS) {
763  switch (Opcode) {
764  case AMDGPU::V_AND_B32_e64:
765  case AMDGPU::V_AND_B32_e32:
766  case AMDGPU::S_AND_B32:
767  Result = LHS & RHS;
768  return true;
769  case AMDGPU::V_OR_B32_e64:
770  case AMDGPU::V_OR_B32_e32:
771  case AMDGPU::S_OR_B32:
772  Result = LHS | RHS;
773  return true;
774  case AMDGPU::V_XOR_B32_e64:
775  case AMDGPU::V_XOR_B32_e32:
776  case AMDGPU::S_XOR_B32:
777  Result = LHS ^ RHS;
778  return true;
779  case AMDGPU::V_LSHL_B32_e64:
780  case AMDGPU::V_LSHL_B32_e32:
781  case AMDGPU::S_LSHL_B32:
782  // The instruction ignores the high bits for out of bounds shifts.
783  Result = LHS << (RHS & 31);
784  return true;
785  case AMDGPU::V_LSHLREV_B32_e64:
786  case AMDGPU::V_LSHLREV_B32_e32:
787  Result = RHS << (LHS & 31);
788  return true;
789  case AMDGPU::V_LSHR_B32_e64:
790  case AMDGPU::V_LSHR_B32_e32:
791  case AMDGPU::S_LSHR_B32:
792  Result = LHS >> (RHS & 31);
793  return true;
794  case AMDGPU::V_LSHRREV_B32_e64:
795  case AMDGPU::V_LSHRREV_B32_e32:
796  Result = RHS >> (LHS & 31);
797  return true;
798  case AMDGPU::V_ASHR_I32_e64:
799  case AMDGPU::V_ASHR_I32_e32:
800  case AMDGPU::S_ASHR_I32:
801  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
802  return true;
803  case AMDGPU::V_ASHRREV_I32_e64:
804  case AMDGPU::V_ASHRREV_I32_e32:
805  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
806  return true;
807  default:
808  return false;
809  }
810 }
811 
812 static unsigned getMovOpc(bool IsScalar) {
813  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
814 }
815 
816 /// Remove any leftover implicit operands from mutating the instruction. e.g.
817 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
818 /// anymore.
820  const MCInstrDesc &Desc = MI.getDesc();
821  unsigned NumOps = Desc.getNumOperands() +
822  Desc.getNumImplicitUses() +
823  Desc.getNumImplicitDefs();
824 
825  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
826  MI.RemoveOperand(I);
827 }
828 
829 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
830  MI.setDesc(NewDesc);
832 }
833 
835  MachineOperand &Op) {
836  if (Op.isReg()) {
837  // If this has a subregister, it obviously is a register source.
838  if (Op.getSubReg() != AMDGPU::NoSubRegister ||
840  return &Op;
841 
842  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
843  if (Def && Def->isMoveImmediate()) {
844  MachineOperand &ImmSrc = Def->getOperand(1);
845  if (ImmSrc.isImm())
846  return &ImmSrc;
847  }
848  }
849 
850  return &Op;
851 }
852 
853 // Try to simplify operations with a constant that may appear after instruction
854 // selection.
855 // TODO: See if a frame index with a fixed offset can fold.
857  const SIInstrInfo *TII,
858  MachineInstr *MI,
859  MachineOperand *ImmOp) {
860  unsigned Opc = MI->getOpcode();
861  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
862  Opc == AMDGPU::S_NOT_B32) {
863  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
864  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
865  return true;
866  }
867 
868  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
869  if (Src1Idx == -1)
870  return false;
871 
872  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
873  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
874  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
875 
876  if (!Src0->isImm() && !Src1->isImm())
877  return false;
878 
879  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
880  if (Src0->isImm() && Src0->getImm() == 0) {
881  // v_lshl_or_b32 0, X, Y -> copy Y
882  // v_lshl_or_b32 0, X, K -> v_mov_b32 K
883  bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
884  MI->RemoveOperand(Src1Idx);
885  MI->RemoveOperand(Src0Idx);
886 
887  MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
888  return true;
889  }
890  }
891 
892  // and k0, k1 -> v_mov_b32 (k0 & k1)
893  // or k0, k1 -> v_mov_b32 (k0 | k1)
894  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
895  if (Src0->isImm() && Src1->isImm()) {
896  int32_t NewImm;
897  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
898  return false;
899 
900  const SIRegisterInfo &TRI = TII->getRegisterInfo();
901  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
902 
903  // Be careful to change the right operand, src0 may belong to a different
904  // instruction.
905  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
906  MI->RemoveOperand(Src1Idx);
907  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
908  return true;
909  }
910 
911  if (!MI->isCommutable())
912  return false;
913 
914  if (Src0->isImm() && !Src1->isImm()) {
915  std::swap(Src0, Src1);
916  std::swap(Src0Idx, Src1Idx);
917  }
918 
919  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
920  if (Opc == AMDGPU::V_OR_B32_e64 ||
921  Opc == AMDGPU::V_OR_B32_e32 ||
922  Opc == AMDGPU::S_OR_B32) {
923  if (Src1Val == 0) {
924  // y = or x, 0 => y = copy x
925  MI->RemoveOperand(Src1Idx);
926  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
927  } else if (Src1Val == -1) {
928  // y = or x, -1 => y = v_mov_b32 -1
929  MI->RemoveOperand(Src1Idx);
930  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
931  } else
932  return false;
933 
934  return true;
935  }
936 
937  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
938  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
939  MI->getOpcode() == AMDGPU::S_AND_B32) {
940  if (Src1Val == 0) {
941  // y = and x, 0 => y = v_mov_b32 0
942  MI->RemoveOperand(Src0Idx);
943  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
944  } else if (Src1Val == -1) {
945  // y = and x, -1 => y = copy x
946  MI->RemoveOperand(Src1Idx);
947  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
949  } else
950  return false;
951 
952  return true;
953  }
954 
955  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
956  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
957  MI->getOpcode() == AMDGPU::S_XOR_B32) {
958  if (Src1Val == 0) {
959  // y = xor x, 0 => y = copy x
960  MI->RemoveOperand(Src1Idx);
961  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
962  return true;
963  }
964  }
965 
966  return false;
967 }
968 
969 // Try to fold an instruction into a simpler one
970 static bool tryFoldInst(const SIInstrInfo *TII,
971  MachineInstr *MI) {
972  unsigned Opc = MI->getOpcode();
973 
974  if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
975  Opc == AMDGPU::V_CNDMASK_B32_e64 ||
976  Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
977  const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
978  const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
979  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
980  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
981  if (Src1->isIdenticalTo(*Src0) &&
982  (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) &&
983  (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) {
984  LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
985  auto &NewDesc =
986  TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
987  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
988  if (Src2Idx != -1)
989  MI->RemoveOperand(Src2Idx);
990  MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
991  if (Src1ModIdx != -1)
992  MI->RemoveOperand(Src1ModIdx);
993  if (Src0ModIdx != -1)
994  MI->RemoveOperand(Src0ModIdx);
995  mutateCopyOp(*MI, NewDesc);
996  LLVM_DEBUG(dbgs() << *MI << '\n');
997  return true;
998  }
999  }
1000 
1001  return false;
1002 }
1003 
1004 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
1005  MachineOperand &OpToFold) const {
1006  // We need mutate the operands of new mov instructions to add implicit
1007  // uses of EXEC, but adding them invalidates the use_iterator, so defer
1008  // this.
1009  SmallVector<MachineInstr *, 4> CopiesToReplace;
1011  MachineOperand &Dst = MI.getOperand(0);
1012 
1013  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1014  if (FoldingImm) {
1015  unsigned NumLiteralUses = 0;
1016  MachineOperand *NonInlineUse = nullptr;
1017  int NonInlineUseOpNo = -1;
1018 
1021  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
1022  Use != E; Use = NextUse) {
1023  NextUse = std::next(Use);
1024  MachineInstr *UseMI = Use->getParent();
1025  unsigned OpNo = Use.getOperandNo();
1026 
1027  // Folding the immediate may reveal operations that can be constant
1028  // folded or replaced with a copy. This can happen for example after
1029  // frame indices are lowered to constants or from splitting 64-bit
1030  // constants.
1031  //
1032  // We may also encounter cases where one or both operands are
1033  // immediates materialized into a register, which would ordinarily not
1034  // be folded due to multiple uses or operand constraints.
1035 
1036  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
1037  LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n');
1038 
1039  // Some constant folding cases change the same immediate's use to a new
1040  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
1041  // again. The same constant folded instruction could also have a second
1042  // use operand.
1043  NextUse = MRI->use_begin(Dst.getReg());
1044  FoldList.clear();
1045  continue;
1046  }
1047 
1048  // Try to fold any inline immediate uses, and then only fold other
1049  // constants if they have one use.
1050  //
1051  // The legality of the inline immediate must be checked based on the use
1052  // operand, not the defining instruction, because 32-bit instructions
1053  // with 32-bit inline immediate sources may be used to materialize
1054  // constants used in 16-bit operands.
1055  //
1056  // e.g. it is unsafe to fold:
1057  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
1058  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
1059 
1060  // Folding immediates with more than one use will increase program size.
1061  // FIXME: This will also reduce register usage, which may be better
1062  // in some cases. A better heuristic is needed.
1063  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
1064  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
1065  } else if (frameIndexMayFold(TII, *UseMI, OpNo, OpToFold)) {
1066  foldOperand(OpToFold, UseMI, OpNo, FoldList,
1067  CopiesToReplace);
1068  } else {
1069  if (++NumLiteralUses == 1) {
1070  NonInlineUse = &*Use;
1071  NonInlineUseOpNo = OpNo;
1072  }
1073  }
1074  }
1075 
1076  if (NumLiteralUses == 1) {
1077  MachineInstr *UseMI = NonInlineUse->getParent();
1078  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
1079  }
1080  } else {
1081  // Folding register.
1084  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
1085  Use != E; ++Use) {
1086  UsesToProcess.push_back(Use);
1087  }
1088  for (auto U : UsesToProcess) {
1089  MachineInstr *UseMI = U->getParent();
1090 
1091  foldOperand(OpToFold, UseMI, U.getOperandNo(),
1092  FoldList, CopiesToReplace);
1093  }
1094  }
1095 
1096  MachineFunction *MF = MI.getParent()->getParent();
1097  // Make sure we add EXEC uses to any new v_mov instructions created.
1098  for (MachineInstr *Copy : CopiesToReplace)
1099  Copy->addImplicitDefUseOperands(*MF);
1100 
1101  for (FoldCandidate &Fold : FoldList) {
1102  if (Fold.isReg() && Register::isVirtualRegister(Fold.OpToFold->getReg())) {
1103  Register Reg = Fold.OpToFold->getReg();
1104  MachineInstr *DefMI = Fold.OpToFold->getParent();
1105  if (DefMI->readsRegister(AMDGPU::EXEC, TRI) &&
1106  execMayBeModifiedBeforeUse(*MRI, Reg, *DefMI, *Fold.UseMI))
1107  continue;
1108  }
1109  if (updateOperand(Fold, *TII, *TRI, *ST)) {
1110  // Clear kill flags.
1111  if (Fold.isReg()) {
1112  assert(Fold.OpToFold && Fold.OpToFold->isReg());
1113  // FIXME: Probably shouldn't bother trying to fold if not an
1114  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
1115  // copies.
1116  MRI->clearKillFlags(Fold.OpToFold->getReg());
1117  }
1118  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
1119  << static_cast<int>(Fold.UseOpNo) << " of "
1120  << *Fold.UseMI << '\n');
1121  tryFoldInst(TII, Fold.UseMI);
1122  } else if (Fold.isCommuted()) {
1123  // Restoring instruction's original operand order if fold has failed.
1124  TII->commuteInstruction(*Fold.UseMI, false);
1125  }
1126  }
1127 }
1128 
1129 // Clamp patterns are canonically selected to v_max_* instructions, so only
1130 // handle them.
1131 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
1132  unsigned Op = MI.getOpcode();
1133  switch (Op) {
1134  case AMDGPU::V_MAX_F32_e64:
1135  case AMDGPU::V_MAX_F16_e64:
1136  case AMDGPU::V_MAX_F64:
1137  case AMDGPU::V_PK_MAX_F16: {
1138  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
1139  return nullptr;
1140 
1141  // Make sure sources are identical.
1142  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1143  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1144  if (!Src0->isReg() || !Src1->isReg() ||
1145  Src0->getReg() != Src1->getReg() ||
1146  Src0->getSubReg() != Src1->getSubReg() ||
1147  Src0->getSubReg() != AMDGPU::NoSubRegister)
1148  return nullptr;
1149 
1150  // Can't fold up if we have modifiers.
1151  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1152  return nullptr;
1153 
1154  unsigned Src0Mods
1155  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
1156  unsigned Src1Mods
1157  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
1158 
1159  // Having a 0 op_sel_hi would require swizzling the output in the source
1160  // instruction, which we can't do.
1161  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
1162  : 0u;
1163  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
1164  return nullptr;
1165  return Src0;
1166  }
1167  default:
1168  return nullptr;
1169  }
1170 }
1171 
1172 // We obviously have multiple uses in a clamp since the register is used twice
1173 // in the same instruction.
1174 static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
1175  int Count = 0;
1176  for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
1177  I != E; ++I) {
1178  if (++Count > 1)
1179  return false;
1180  }
1181 
1182  return true;
1183 }
1184 
1185 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
1186 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
1187  const MachineOperand *ClampSrc = isClamp(MI);
1188  if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
1189  return false;
1190 
1191  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
1192 
1193  // The type of clamp must be compatible.
1194  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
1195  return false;
1196 
1197  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
1198  if (!DefClamp)
1199  return false;
1200 
1201  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def
1202  << '\n');
1203 
1204  // Clamp is applied after omod, so it is OK if omod is set.
1205  DefClamp->setImm(1);
1206  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1207  MI.eraseFromParent();
1208  return true;
1209 }
1210 
1211 static int getOModValue(unsigned Opc, int64_t Val) {
1212  switch (Opc) {
1213  case AMDGPU::V_MUL_F32_e64: {
1214  switch (static_cast<uint32_t>(Val)) {
1215  case 0x3f000000: // 0.5
1216  return SIOutMods::DIV2;
1217  case 0x40000000: // 2.0
1218  return SIOutMods::MUL2;
1219  case 0x40800000: // 4.0
1220  return SIOutMods::MUL4;
1221  default:
1222  return SIOutMods::NONE;
1223  }
1224  }
1225  case AMDGPU::V_MUL_F16_e64: {
1226  switch (static_cast<uint16_t>(Val)) {
1227  case 0x3800: // 0.5
1228  return SIOutMods::DIV2;
1229  case 0x4000: // 2.0
1230  return SIOutMods::MUL2;
1231  case 0x4400: // 4.0
1232  return SIOutMods::MUL4;
1233  default:
1234  return SIOutMods::NONE;
1235  }
1236  }
1237  default:
1238  llvm_unreachable("invalid mul opcode");
1239  }
1240 }
1241 
1242 // FIXME: Does this really not support denormals with f16?
1243 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1244 // handled, so will anything other than that break?
1245 std::pair<const MachineOperand *, int>
1246 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1247  unsigned Op = MI.getOpcode();
1248  switch (Op) {
1249  case AMDGPU::V_MUL_F32_e64:
1250  case AMDGPU::V_MUL_F16_e64: {
1251  // If output denormals are enabled, omod is ignored.
1252  if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
1253  (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
1254  return std::make_pair(nullptr, SIOutMods::NONE);
1255 
1256  const MachineOperand *RegOp = nullptr;
1257  const MachineOperand *ImmOp = nullptr;
1258  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1259  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1260  if (Src0->isImm()) {
1261  ImmOp = Src0;
1262  RegOp = Src1;
1263  } else if (Src1->isImm()) {
1264  ImmOp = Src1;
1265  RegOp = Src0;
1266  } else
1267  return std::make_pair(nullptr, SIOutMods::NONE);
1268 
1269  int OMod = getOModValue(Op, ImmOp->getImm());
1270  if (OMod == SIOutMods::NONE ||
1271  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1272  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1273  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1274  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1275  return std::make_pair(nullptr, SIOutMods::NONE);
1276 
1277  return std::make_pair(RegOp, OMod);
1278  }
1279  case AMDGPU::V_ADD_F32_e64:
1280  case AMDGPU::V_ADD_F16_e64: {
1281  // If output denormals are enabled, omod is ignored.
1282  if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
1283  (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
1284  return std::make_pair(nullptr, SIOutMods::NONE);
1285 
1286  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1287  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1288  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1289 
1290  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1291  Src0->getSubReg() == Src1->getSubReg() &&
1292  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1293  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1294  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1295  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1296  return std::make_pair(Src0, SIOutMods::MUL2);
1297 
1298  return std::make_pair(nullptr, SIOutMods::NONE);
1299  }
1300  default:
1301  return std::make_pair(nullptr, SIOutMods::NONE);
1302  }
1303 }
1304 
1305 // FIXME: Does this need to check IEEE bit on function?
1306 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1307  const MachineOperand *RegOp;
1308  int OMod;
1309  std::tie(RegOp, OMod) = isOMod(MI);
1310  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1311  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1312  !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
1313  return false;
1314 
1315  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1316  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1317  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1318  return false;
1319 
1320  // Clamp is applied after omod. If the source already has clamp set, don't
1321  // fold it.
1322  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1323  return false;
1324 
1325  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
1326 
1327  DefOMod->setImm(OMod);
1328  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1329  MI.eraseFromParent();
1330  return true;
1331 }
1332 
1333 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1334  if (skipFunction(MF.getFunction()))
1335  return false;
1336 
1337  MRI = &MF.getRegInfo();
1338  ST = &MF.getSubtarget<GCNSubtarget>();
1339  TII = ST->getInstrInfo();
1340  TRI = &TII->getRegisterInfo();
1341  MFI = MF.getInfo<SIMachineFunctionInfo>();
1342 
1343  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1344  // correctly handle signed zeros.
1345  //
1346  // FIXME: Also need to check strictfp
1347  bool IsIEEEMode = MFI->getMode().IEEE;
1348  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1349 
1350  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1352 
1353  MachineOperand *CurrentKnownM0Val = nullptr;
1354  for (I = MBB->begin(); I != MBB->end(); I = Next) {
1355  Next = std::next(I);
1356  MachineInstr &MI = *I;
1357 
1358  tryFoldInst(TII, &MI);
1359 
1360  if (!TII->isFoldableCopy(MI)) {
1361  // TODO: Omod might be OK if there is NSZ only on the source
1362  // instruction, and not the omod multiply.
1363  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1364  !tryFoldOMod(MI))
1365  tryFoldClamp(MI);
1366 
1367  // Saw an unknown clobber of m0, so we no longer know what it is.
1368  if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
1369  CurrentKnownM0Val = nullptr;
1370  continue;
1371  }
1372 
1373  // Specially track simple redefs of m0 to the same value in a block, so we
1374  // can erase the later ones.
1375  if (MI.getOperand(0).getReg() == AMDGPU::M0) {
1376  MachineOperand &NewM0Val = MI.getOperand(1);
1377  if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
1378  MI.eraseFromParent();
1379  continue;
1380  }
1381 
1382  // We aren't tracking other physical registers
1383  CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ?
1384  nullptr : &NewM0Val;
1385  continue;
1386  }
1387 
1388  MachineOperand &OpToFold = MI.getOperand(1);
1389  bool FoldingImm =
1390  OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1391 
1392  // FIXME: We could also be folding things like TargetIndexes.
1393  if (!FoldingImm && !OpToFold.isReg())
1394  continue;
1395 
1396  if (OpToFold.isReg() && !Register::isVirtualRegister(OpToFold.getReg()))
1397  continue;
1398 
1399  // Prevent folding operands backwards in the function. For example,
1400  // the COPY opcode must not be replaced by 1 in this example:
1401  //
1402  // %3 = COPY %vgpr0; VGPR_32:%3
1403  // ...
1404  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1405  MachineOperand &Dst = MI.getOperand(0);
1406  if (Dst.isReg() && !Register::isVirtualRegister(Dst.getReg()))
1407  continue;
1408 
1409  foldInstOperand(MI, OpToFold);
1410  }
1411  }
1412  return false;
1413 }
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:551
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:573
This class represents lattice values for constants.
Definition: AllocatorList.h:23
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:526
void setIsUndef(bool Val=true)
unsigned Reg
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isRegSequence() const
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
unsigned const TargetRegisterInfo * TRI
F(f)
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:723
AMDGPU::SIModeRegisterDefaults getMode() const
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:570
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:82
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:443
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:89
void setReg(Register Reg)
Change the register this operand corresponds to.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:47
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI, const GCNSubtarget &ST)
Address of a global value.
unsigned const MachineRegisterInfo * MRI
bool isFoldableCopy(const MachineInstr &MI) const
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:254
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static Register UseReg(const MachineOperand &MO)
Register is known to be fully dead.
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
static bool frameIndexMayFold(const SIInstrInfo *TII, const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold)
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
#define DEBUG_TYPE
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
MachineInstrBuilder MachineInstrBuilder & DefMI
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
int64_t getImm() const
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< mop_iterator > implicit_operands()
Definition: MachineInstr.h:490
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
FunctionPass * createSIFoldOperandsPass()
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
uint32_t Size
Definition: Profile.cpp:46
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:382
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:427
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:190
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
Register getReg() const
getReg - Returns the register number.
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:70
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:296
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:882
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isImplicit() const
static bool tryToFoldACImm(const SIInstrInfo *TII, const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, SmallVectorImpl< FoldCandidate > &FoldList)
const SIRegisterInfo * getRegisterInfo() const override
static bool isFLATScratch(const MachineInstr &MI)
Definition: SIInstrInfo.h:507