LLVM  8.0.0svn
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "SIMachineFunctionInfo.h"
22 #include "llvm/Support/Debug.h"
25 
26 #define DEBUG_TYPE "si-fold-operands"
27 using namespace llvm;
28 
29 namespace {
30 
31 struct FoldCandidate {
33  union {
34  MachineOperand *OpToFold;
35  uint64_t ImmToFold;
36  int FrameIndexToFold;
37  };
38  int ShrinkOpcode;
39  unsigned char UseOpNo;
41  bool Commuted;
42 
43  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
44  bool Commuted_ = false,
45  int ShrinkOp = -1) :
46  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
47  Kind(FoldOp->getType()),
48  Commuted(Commuted_) {
49  if (FoldOp->isImm()) {
50  ImmToFold = FoldOp->getImm();
51  } else if (FoldOp->isFI()) {
52  FrameIndexToFold = FoldOp->getIndex();
53  } else {
54  assert(FoldOp->isReg());
55  OpToFold = FoldOp;
56  }
57  }
58 
59  bool isFI() const {
60  return Kind == MachineOperand::MO_FrameIndex;
61  }
62 
63  bool isImm() const {
64  return Kind == MachineOperand::MO_Immediate;
65  }
66 
67  bool isReg() const {
68  return Kind == MachineOperand::MO_Register;
69  }
70 
71  bool isCommuted() const {
72  return Commuted;
73  }
74 
75  bool needsShrink() const {
76  return ShrinkOpcode != -1;
77  }
78 
79  int getShrinkOpcode() const {
80  return ShrinkOpcode;
81  }
82 };
83 
84 class SIFoldOperands : public MachineFunctionPass {
85 public:
86  static char ID;
88  const SIInstrInfo *TII;
89  const SIRegisterInfo *TRI;
90  const GCNSubtarget *ST;
91 
92  void foldOperand(MachineOperand &OpToFold,
94  unsigned UseOpIdx,
96  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
97 
98  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
99 
100  const MachineOperand *isClamp(const MachineInstr &MI) const;
101  bool tryFoldClamp(MachineInstr &MI);
102 
103  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
104  bool tryFoldOMod(MachineInstr &MI);
105 
106 public:
107  SIFoldOperands() : MachineFunctionPass(ID) {
109  }
110 
111  bool runOnMachineFunction(MachineFunction &MF) override;
112 
113  StringRef getPassName() const override { return "SI Fold Operands"; }
114 
115  void getAnalysisUsage(AnalysisUsage &AU) const override {
116  AU.setPreservesCFG();
118  }
119 };
120 
121 } // End anonymous namespace.
122 
123 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
124  "SI Fold Operands", false, false)
125 
126 char SIFoldOperands::ID = 0;
127 
129 
130 // Wrapper around isInlineConstant that understands special cases when
131 // instruction types are replaced during operand folding.
133  const MachineInstr &UseMI,
134  unsigned OpNo,
135  const MachineOperand &OpToFold) {
136  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
137  return true;
138 
139  unsigned Opc = UseMI.getOpcode();
140  switch (Opc) {
141  case AMDGPU::V_MAC_F32_e64:
142  case AMDGPU::V_MAC_F16_e64:
143  case AMDGPU::V_FMAC_F32_e64: {
144  // Special case for mac. Since this is replaced with mad when folded into
145  // src2, we need to check the legality for the final instruction.
146  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
147  if (static_cast<int>(OpNo) == Src2Idx) {
148  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
149  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
150 
151  unsigned Opc = IsFMA ?
152  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
153  const MCInstrDesc &MadDesc = TII->get(Opc);
154  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
155  }
156  return false;
157  }
158  default:
159  return false;
160  }
161 }
162 
164  return new SIFoldOperands();
165 }
166 
167 static bool updateOperand(FoldCandidate &Fold,
168  const SIInstrInfo &TII,
169  const TargetRegisterInfo &TRI) {
170  MachineInstr *MI = Fold.UseMI;
171  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
172  assert(Old.isReg());
173 
174  if (Fold.isImm()) {
175  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
176  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
177  // already set.
178  unsigned Opcode = MI->getOpcode();
179  int OpNo = MI->getOperandNo(&Old);
180  int ModIdx = -1;
181  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
182  ModIdx = AMDGPU::OpName::src0_modifiers;
183  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
184  ModIdx = AMDGPU::OpName::src1_modifiers;
185  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
186  ModIdx = AMDGPU::OpName::src2_modifiers;
187  assert(ModIdx != -1);
188  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
189  MachineOperand &Mod = MI->getOperand(ModIdx);
190  unsigned Val = Mod.getImm();
191  if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
192  return false;
193  // If upper part is all zero we do not need op_sel_hi.
194  if (!isUInt<16>(Fold.ImmToFold)) {
195  if (!(Fold.ImmToFold & 0xffff)) {
196  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
197  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
198  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
199  return true;
200  }
201  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
202  }
203  }
204 
205  if (Fold.needsShrink()) {
206  MachineBasicBlock *MBB = MI->getParent();
207  auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
208  if (Liveness != MachineBasicBlock::LQR_Dead)
209  return false;
210 
212  int Op32 = Fold.getShrinkOpcode();
213  MachineOperand &Dst0 = MI->getOperand(0);
214  MachineOperand &Dst1 = MI->getOperand(1);
215  assert(Dst0.isDef() && Dst1.isDef());
216 
217  bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
218 
219  const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
220  unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
221  const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
222  unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
223 
224  MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
225 
226  if (HaveNonDbgCarryUse) {
227  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
228  .addReg(AMDGPU::VCC, RegState::Kill);
229  }
230 
231  // Keep the old instruction around to avoid breaking iterators, but
232  // replace the outputs with dummy registers.
233  Dst0.setReg(NewReg0);
234  Dst1.setReg(NewReg1);
235 
236  if (Fold.isCommuted())
237  TII.commuteInstruction(*Inst32, false);
238  return true;
239  }
240 
241  Old.ChangeToImmediate(Fold.ImmToFold);
242  return true;
243  }
244 
245  assert(!Fold.needsShrink() && "not handled");
246 
247  if (Fold.isFI()) {
248  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
249  return true;
250  }
251 
252  MachineOperand *New = Fold.OpToFold;
255  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
256 
257  Old.setIsUndef(New->isUndef());
258  return true;
259  }
260 
261  // FIXME: Handle physical registers.
262 
263  return false;
264 }
265 
267  const MachineInstr *MI) {
268  for (auto Candidate : FoldList) {
269  if (Candidate.UseMI == MI)
270  return true;
271  }
272  return false;
273 }
274 
276  MachineInstr *MI, unsigned OpNo,
277  MachineOperand *OpToFold,
278  const SIInstrInfo *TII) {
279  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
280 
281  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
282  unsigned Opc = MI->getOpcode();
283  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
284  Opc == AMDGPU::V_FMAC_F32_e64) &&
285  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
286  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
287  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
288  unsigned NewOpc = IsFMA ?
289  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
290 
291  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
292  // to fold the operand.
293  MI->setDesc(TII->get(NewOpc));
294  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
295  if (FoldAsMAD) {
296  MI->untieRegOperand(OpNo);
297  return true;
298  }
299  MI->setDesc(TII->get(Opc));
300  }
301 
302  // Special case for s_setreg_b32
303  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
304  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
305  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
306  return true;
307  }
308 
309  // If we are already folding into another operand of MI, then
310  // we can't commute the instruction, otherwise we risk making the
311  // other fold illegal.
312  if (isUseMIInFoldList(FoldList, MI))
313  return false;
314 
315  unsigned CommuteOpNo = OpNo;
316 
317  // Operand is not legal, so try to commute the instruction to
318  // see if this makes it possible to fold.
319  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
320  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
321  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
322 
323  if (CanCommute) {
324  if (CommuteIdx0 == OpNo)
325  CommuteOpNo = CommuteIdx1;
326  else if (CommuteIdx1 == OpNo)
327  CommuteOpNo = CommuteIdx0;
328  }
329 
330 
331  // One of operands might be an Imm operand, and OpNo may refer to it after
332  // the call of commuteInstruction() below. Such situations are avoided
333  // here explicitly as OpNo must be a register operand to be a candidate
334  // for memory folding.
335  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
336  !MI->getOperand(CommuteIdx1).isReg()))
337  return false;
338 
339  if (!CanCommute ||
340  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
341  return false;
342 
343  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
344  if ((Opc == AMDGPU::V_ADD_I32_e64 ||
345  Opc == AMDGPU::V_SUB_I32_e64 ||
346  Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
347  OpToFold->isImm()) {
349 
350  // Verify the other operand is a VGPR, otherwise we would violate the
351  // constant bus restriction.
352  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
353  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
354  if (!OtherOp.isReg() ||
355  !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
356  return false;
357 
358  assert(MI->getOperand(1).isDef());
359 
360  int Op32 = AMDGPU::getVOPe32(Opc);
361  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
362  Op32));
363  return true;
364  }
365 
366  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
367  return false;
368  }
369 
370  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
371  return true;
372  }
373 
374  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
375  return true;
376 }
377 
378 // If the use operand doesn't care about the value, this may be an operand only
379 // used for register indexing, in which case it is unsafe to fold.
380 static bool isUseSafeToFold(const SIInstrInfo *TII,
381  const MachineInstr &MI,
382  const MachineOperand &UseMO) {
383  return !UseMO.isUndef() && !TII->isSDWA(MI);
384  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
385 }
386 
387 void SIFoldOperands::foldOperand(
388  MachineOperand &OpToFold,
390  unsigned UseOpIdx,
392  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
393  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
394 
395  if (!isUseSafeToFold(TII, *UseMI, UseOp))
396  return;
397 
398  // FIXME: Fold operands with subregs.
399  if (UseOp.isReg() && OpToFold.isReg()) {
400  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
401  return;
402 
403  // Don't fold subregister extracts into tied operands, only if it is a full
404  // copy since a subregister use tied to a full register def doesn't really
405  // make sense. e.g. don't fold:
406  //
407  // %1 = COPY %0:sub1
408  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
409  //
410  // into
411  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
412  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
413  return;
414  }
415 
416  // Special case for REG_SEQUENCE: We can't fold literals into
417  // REG_SEQUENCE instructions, so we have to fold them into the
418  // uses of REG_SEQUENCE.
419  if (UseMI->isRegSequence()) {
420  unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
421  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
422 
424  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
425  RSUse != RSE; ++RSUse) {
426 
427  MachineInstr *RSUseMI = RSUse->getParent();
428  if (RSUse->getSubReg() != RegSeqDstSubReg)
429  continue;
430 
431  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
432  CopiesToReplace);
433  }
434 
435  return;
436  }
437 
438 
439  bool FoldingImm = OpToFold.isImm();
440 
441  if (FoldingImm && UseMI->isCopy()) {
442  unsigned DestReg = UseMI->getOperand(0).getReg();
443  const TargetRegisterClass *DestRC
445  MRI->getRegClass(DestReg) :
446  TRI->getPhysRegClass(DestReg);
447 
448  unsigned SrcReg = UseMI->getOperand(1).getReg();
451  const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
452  if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
456  Use = MRI->use_begin(DestReg), E = MRI->use_end();
457  Use != E; Use = NextUse) {
458  NextUse = std::next(Use);
459  FoldCandidate FC = FoldCandidate(Use->getParent(),
460  Use.getOperandNo(), &UseMI->getOperand(1));
461  CopyUses.push_back(FC);
462  }
463  for (auto & F : CopyUses) {
464  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
465  FoldList, CopiesToReplace);
466  }
467  }
468  }
469 
470  // In order to fold immediates into copies, we need to change the
471  // copy to a MOV.
472 
473  unsigned MovOp = TII->getMovOpcode(DestRC);
474  if (MovOp == AMDGPU::COPY)
475  return;
476 
477  UseMI->setDesc(TII->get(MovOp));
478  CopiesToReplace.push_back(UseMI);
479  } else {
480  if (UseMI->isCopy() && OpToFold.isReg() &&
483  TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
484  TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
485  !UseMI->getOperand(1).getSubReg()) {
486  UseMI->getOperand(1).setReg(OpToFold.getReg());
487  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
488  UseMI->getOperand(1).setIsKill(false);
489  CopiesToReplace.push_back(UseMI);
490  OpToFold.setIsKill(false);
491  return;
492  }
493 
494  const MCInstrDesc &UseDesc = UseMI->getDesc();
495 
496  // Don't fold into target independent nodes. Target independent opcodes
497  // don't have defined register classes.
498  if (UseDesc.isVariadic() ||
499  UseOp.isImplicit() ||
500  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
501  return;
502  }
503 
504  if (!FoldingImm) {
505  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
506 
507  // FIXME: We could try to change the instruction from 64-bit to 32-bit
508  // to enable more folding opportunites. The shrink operands pass
509  // already does this.
510  return;
511  }
512 
513 
514  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
515  const TargetRegisterClass *FoldRC =
516  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
517 
518 
519  // Split 64-bit constants into 32-bits for folding.
520  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
521  unsigned UseReg = UseOp.getReg();
522  const TargetRegisterClass *UseRC
524  MRI->getRegClass(UseReg) :
525  TRI->getPhysRegClass(UseReg);
526 
527  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
528  return;
529 
530  APInt Imm(64, OpToFold.getImm());
531  if (UseOp.getSubReg() == AMDGPU::sub0) {
532  Imm = Imm.getLoBits(32);
533  } else {
534  assert(UseOp.getSubReg() == AMDGPU::sub1);
535  Imm = Imm.getHiBits(32);
536  }
537 
538  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
539  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
540  return;
541  }
542 
543 
544 
545  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
546 }
547 
548 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
549  uint32_t LHS, uint32_t RHS) {
550  switch (Opcode) {
551  case AMDGPU::V_AND_B32_e64:
552  case AMDGPU::V_AND_B32_e32:
553  case AMDGPU::S_AND_B32:
554  Result = LHS & RHS;
555  return true;
556  case AMDGPU::V_OR_B32_e64:
557  case AMDGPU::V_OR_B32_e32:
558  case AMDGPU::S_OR_B32:
559  Result = LHS | RHS;
560  return true;
561  case AMDGPU::V_XOR_B32_e64:
562  case AMDGPU::V_XOR_B32_e32:
563  case AMDGPU::S_XOR_B32:
564  Result = LHS ^ RHS;
565  return true;
566  case AMDGPU::V_LSHL_B32_e64:
567  case AMDGPU::V_LSHL_B32_e32:
568  case AMDGPU::S_LSHL_B32:
569  // The instruction ignores the high bits for out of bounds shifts.
570  Result = LHS << (RHS & 31);
571  return true;
572  case AMDGPU::V_LSHLREV_B32_e64:
573  case AMDGPU::V_LSHLREV_B32_e32:
574  Result = RHS << (LHS & 31);
575  return true;
576  case AMDGPU::V_LSHR_B32_e64:
577  case AMDGPU::V_LSHR_B32_e32:
578  case AMDGPU::S_LSHR_B32:
579  Result = LHS >> (RHS & 31);
580  return true;
581  case AMDGPU::V_LSHRREV_B32_e64:
582  case AMDGPU::V_LSHRREV_B32_e32:
583  Result = RHS >> (LHS & 31);
584  return true;
585  case AMDGPU::V_ASHR_I32_e64:
586  case AMDGPU::V_ASHR_I32_e32:
587  case AMDGPU::S_ASHR_I32:
588  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
589  return true;
590  case AMDGPU::V_ASHRREV_I32_e64:
591  case AMDGPU::V_ASHRREV_I32_e32:
592  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
593  return true;
594  default:
595  return false;
596  }
597 }
598 
599 static unsigned getMovOpc(bool IsScalar) {
600  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
601 }
602 
603 /// Remove any leftover implicit operands from mutating the instruction. e.g.
604 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
605 /// anymore.
607  const MCInstrDesc &Desc = MI.getDesc();
608  unsigned NumOps = Desc.getNumOperands() +
609  Desc.getNumImplicitUses() +
610  Desc.getNumImplicitDefs();
611 
612  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
613  MI.RemoveOperand(I);
614 }
615 
616 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
617  MI.setDesc(NewDesc);
619 }
620 
622  MachineOperand &Op) {
623  if (Op.isReg()) {
624  // If this has a subregister, it obviously is a register source.
625  if (Op.getSubReg() != AMDGPU::NoSubRegister ||
627  return &Op;
628 
629  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
630  if (Def && Def->isMoveImmediate()) {
631  MachineOperand &ImmSrc = Def->getOperand(1);
632  if (ImmSrc.isImm())
633  return &ImmSrc;
634  }
635  }
636 
637  return &Op;
638 }
639 
640 // Try to simplify operations with a constant that may appear after instruction
641 // selection.
642 // TODO: See if a frame index with a fixed offset can fold.
644  const SIInstrInfo *TII,
645  MachineInstr *MI,
646  MachineOperand *ImmOp) {
647  unsigned Opc = MI->getOpcode();
648  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
649  Opc == AMDGPU::S_NOT_B32) {
650  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
651  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
652  return true;
653  }
654 
655  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
656  if (Src1Idx == -1)
657  return false;
658 
659  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
660  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
661  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
662 
663  if (!Src0->isImm() && !Src1->isImm())
664  return false;
665 
666  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
667  if (Src0->isImm() && Src0->getImm() == 0) {
668  // v_lshl_or_b32 0, X, Y -> copy Y
669  // v_lshl_or_b32 0, X, K -> v_mov_b32 K
670  bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
671  MI->RemoveOperand(Src1Idx);
672  MI->RemoveOperand(Src0Idx);
673 
674  MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
675  return true;
676  }
677  }
678 
679  // and k0, k1 -> v_mov_b32 (k0 & k1)
680  // or k0, k1 -> v_mov_b32 (k0 | k1)
681  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
682  if (Src0->isImm() && Src1->isImm()) {
683  int32_t NewImm;
684  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
685  return false;
686 
687  const SIRegisterInfo &TRI = TII->getRegisterInfo();
688  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
689 
690  // Be careful to change the right operand, src0 may belong to a different
691  // instruction.
692  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
693  MI->RemoveOperand(Src1Idx);
694  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
695  return true;
696  }
697 
698  if (!MI->isCommutable())
699  return false;
700 
701  if (Src0->isImm() && !Src1->isImm()) {
702  std::swap(Src0, Src1);
703  std::swap(Src0Idx, Src1Idx);
704  }
705 
706  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
707  if (Opc == AMDGPU::V_OR_B32_e64 ||
708  Opc == AMDGPU::V_OR_B32_e32 ||
709  Opc == AMDGPU::S_OR_B32) {
710  if (Src1Val == 0) {
711  // y = or x, 0 => y = copy x
712  MI->RemoveOperand(Src1Idx);
713  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
714  } else if (Src1Val == -1) {
715  // y = or x, -1 => y = v_mov_b32 -1
716  MI->RemoveOperand(Src1Idx);
717  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
718  } else
719  return false;
720 
721  return true;
722  }
723 
724  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
725  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
726  MI->getOpcode() == AMDGPU::S_AND_B32) {
727  if (Src1Val == 0) {
728  // y = and x, 0 => y = v_mov_b32 0
729  MI->RemoveOperand(Src0Idx);
730  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
731  } else if (Src1Val == -1) {
732  // y = and x, -1 => y = copy x
733  MI->RemoveOperand(Src1Idx);
734  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
736  } else
737  return false;
738 
739  return true;
740  }
741 
742  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
743  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
744  MI->getOpcode() == AMDGPU::S_XOR_B32) {
745  if (Src1Val == 0) {
746  // y = xor x, 0 => y = copy x
747  MI->RemoveOperand(Src1Idx);
748  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
749  return true;
750  }
751  }
752 
753  return false;
754 }
755 
756 // Try to fold an instruction into a simpler one
757 static bool tryFoldInst(const SIInstrInfo *TII,
758  MachineInstr *MI) {
759  unsigned Opc = MI->getOpcode();
760 
761  if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
762  Opc == AMDGPU::V_CNDMASK_B32_e64 ||
763  Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
764  const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
765  const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
766  if (Src1->isIdenticalTo(*Src0)) {
767  LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
768  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
769  if (Src2Idx != -1)
770  MI->RemoveOperand(Src2Idx);
771  MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
772  mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
773  : getMovOpc(false)));
774  LLVM_DEBUG(dbgs() << *MI << '\n');
775  return true;
776  }
777  }
778 
779  return false;
780 }
781 
782 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
783  MachineOperand &OpToFold) const {
784  // We need mutate the operands of new mov instructions to add implicit
785  // uses of EXEC, but adding them invalidates the use_iterator, so defer
786  // this.
787  SmallVector<MachineInstr *, 4> CopiesToReplace;
789  MachineOperand &Dst = MI.getOperand(0);
790 
791  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
792  if (FoldingImm) {
793  unsigned NumLiteralUses = 0;
794  MachineOperand *NonInlineUse = nullptr;
795  int NonInlineUseOpNo = -1;
796 
799  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
800  Use != E; Use = NextUse) {
801  NextUse = std::next(Use);
802  MachineInstr *UseMI = Use->getParent();
803  unsigned OpNo = Use.getOperandNo();
804 
805  // Folding the immediate may reveal operations that can be constant
806  // folded or replaced with a copy. This can happen for example after
807  // frame indices are lowered to constants or from splitting 64-bit
808  // constants.
809  //
810  // We may also encounter cases where one or both operands are
811  // immediates materialized into a register, which would ordinarily not
812  // be folded due to multiple uses or operand constraints.
813 
814  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
815  LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n');
816 
817  // Some constant folding cases change the same immediate's use to a new
818  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
819  // again. The same constant folded instruction could also have a second
820  // use operand.
821  NextUse = MRI->use_begin(Dst.getReg());
822  FoldList.clear();
823  continue;
824  }
825 
826  // Try to fold any inline immediate uses, and then only fold other
827  // constants if they have one use.
828  //
829  // The legality of the inline immediate must be checked based on the use
830  // operand, not the defining instruction, because 32-bit instructions
831  // with 32-bit inline immediate sources may be used to materialize
832  // constants used in 16-bit operands.
833  //
834  // e.g. it is unsafe to fold:
835  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
836  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
837 
838  // Folding immediates with more than one use will increase program size.
839  // FIXME: This will also reduce register usage, which may be better
840  // in some cases. A better heuristic is needed.
841  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
842  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
843  } else {
844  if (++NumLiteralUses == 1) {
845  NonInlineUse = &*Use;
846  NonInlineUseOpNo = OpNo;
847  }
848  }
849  }
850 
851  if (NumLiteralUses == 1) {
852  MachineInstr *UseMI = NonInlineUse->getParent();
853  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
854  }
855  } else {
856  // Folding register.
858  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
859  Use != E; ++Use) {
860  MachineInstr *UseMI = Use->getParent();
861 
862  foldOperand(OpToFold, UseMI, Use.getOperandNo(),
863  FoldList, CopiesToReplace);
864  }
865  }
866 
867  MachineFunction *MF = MI.getParent()->getParent();
868  // Make sure we add EXEC uses to any new v_mov instructions created.
869  for (MachineInstr *Copy : CopiesToReplace)
870  Copy->addImplicitDefUseOperands(*MF);
871 
872  for (FoldCandidate &Fold : FoldList) {
873  if (updateOperand(Fold, *TII, *TRI)) {
874  // Clear kill flags.
875  if (Fold.isReg()) {
876  assert(Fold.OpToFold && Fold.OpToFold->isReg());
877  // FIXME: Probably shouldn't bother trying to fold if not an
878  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
879  // copies.
880  MRI->clearKillFlags(Fold.OpToFold->getReg());
881  }
882  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
883  << static_cast<int>(Fold.UseOpNo) << " of "
884  << *Fold.UseMI << '\n');
885  tryFoldInst(TII, Fold.UseMI);
886  } else if (Fold.isCommuted()) {
887  // Restoring instruction's original operand order if fold has failed.
888  TII->commuteInstruction(*Fold.UseMI, false);
889  }
890  }
891 }
892 
893 // Clamp patterns are canonically selected to v_max_* instructions, so only
894 // handle them.
895 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
896  unsigned Op = MI.getOpcode();
897  switch (Op) {
898  case AMDGPU::V_MAX_F32_e64:
899  case AMDGPU::V_MAX_F16_e64:
900  case AMDGPU::V_MAX_F64:
901  case AMDGPU::V_PK_MAX_F16: {
902  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
903  return nullptr;
904 
905  // Make sure sources are identical.
906  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
907  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
908  if (!Src0->isReg() || !Src1->isReg() ||
909  Src0->getReg() != Src1->getReg() ||
910  Src0->getSubReg() != Src1->getSubReg() ||
911  Src0->getSubReg() != AMDGPU::NoSubRegister)
912  return nullptr;
913 
914  // Can't fold up if we have modifiers.
915  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
916  return nullptr;
917 
918  unsigned Src0Mods
919  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
920  unsigned Src1Mods
921  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
922 
923  // Having a 0 op_sel_hi would require swizzling the output in the source
924  // instruction, which we can't do.
925  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0;
926  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
927  return nullptr;
928  return Src0;
929  }
930  default:
931  return nullptr;
932  }
933 }
934 
935 // We obviously have multiple uses in a clamp since the register is used twice
936 // in the same instruction.
937 static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
938  int Count = 0;
939  for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
940  I != E; ++I) {
941  if (++Count > 1)
942  return false;
943  }
944 
945  return true;
946 }
947 
948 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
949 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
950  const MachineOperand *ClampSrc = isClamp(MI);
951  if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
952  return false;
953 
954  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
955 
956  // The type of clamp must be compatible.
957  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
958  return false;
959 
960  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
961  if (!DefClamp)
962  return false;
963 
964  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def
965  << '\n');
966 
967  // Clamp is applied after omod, so it is OK if omod is set.
968  DefClamp->setImm(1);
969  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
970  MI.eraseFromParent();
971  return true;
972 }
973 
974 static int getOModValue(unsigned Opc, int64_t Val) {
975  switch (Opc) {
976  case AMDGPU::V_MUL_F32_e64: {
977  switch (static_cast<uint32_t>(Val)) {
978  case 0x3f000000: // 0.5
979  return SIOutMods::DIV2;
980  case 0x40000000: // 2.0
981  return SIOutMods::MUL2;
982  case 0x40800000: // 4.0
983  return SIOutMods::MUL4;
984  default:
985  return SIOutMods::NONE;
986  }
987  }
988  case AMDGPU::V_MUL_F16_e64: {
989  switch (static_cast<uint16_t>(Val)) {
990  case 0x3800: // 0.5
991  return SIOutMods::DIV2;
992  case 0x4000: // 2.0
993  return SIOutMods::MUL2;
994  case 0x4400: // 4.0
995  return SIOutMods::MUL4;
996  default:
997  return SIOutMods::NONE;
998  }
999  }
1000  default:
1001  llvm_unreachable("invalid mul opcode");
1002  }
1003 }
1004 
1005 // FIXME: Does this really not support denormals with f16?
1006 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1007 // handled, so will anything other than that break?
1008 std::pair<const MachineOperand *, int>
1009 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1010  unsigned Op = MI.getOpcode();
1011  switch (Op) {
1012  case AMDGPU::V_MUL_F32_e64:
1013  case AMDGPU::V_MUL_F16_e64: {
1014  // If output denormals are enabled, omod is ignored.
1015  if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
1016  (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
1017  return std::make_pair(nullptr, SIOutMods::NONE);
1018 
1019  const MachineOperand *RegOp = nullptr;
1020  const MachineOperand *ImmOp = nullptr;
1021  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1022  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1023  if (Src0->isImm()) {
1024  ImmOp = Src0;
1025  RegOp = Src1;
1026  } else if (Src1->isImm()) {
1027  ImmOp = Src1;
1028  RegOp = Src0;
1029  } else
1030  return std::make_pair(nullptr, SIOutMods::NONE);
1031 
1032  int OMod = getOModValue(Op, ImmOp->getImm());
1033  if (OMod == SIOutMods::NONE ||
1034  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1035  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1036  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1037  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1038  return std::make_pair(nullptr, SIOutMods::NONE);
1039 
1040  return std::make_pair(RegOp, OMod);
1041  }
1042  case AMDGPU::V_ADD_F32_e64:
1043  case AMDGPU::V_ADD_F16_e64: {
1044  // If output denormals are enabled, omod is ignored.
1045  if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
1046  (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
1047  return std::make_pair(nullptr, SIOutMods::NONE);
1048 
1049  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1050  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1051  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1052 
1053  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1054  Src0->getSubReg() == Src1->getSubReg() &&
1055  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1056  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1057  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1058  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1059  return std::make_pair(Src0, SIOutMods::MUL2);
1060 
1061  return std::make_pair(nullptr, SIOutMods::NONE);
1062  }
1063  default:
1064  return std::make_pair(nullptr, SIOutMods::NONE);
1065  }
1066 }
1067 
1068 // FIXME: Does this need to check IEEE bit on function?
1069 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1070  const MachineOperand *RegOp;
1071  int OMod;
1072  std::tie(RegOp, OMod) = isOMod(MI);
1073  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1074  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1075  !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
1076  return false;
1077 
1078  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1079  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1080  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1081  return false;
1082 
1083  // Clamp is applied after omod. If the source already has clamp set, don't
1084  // fold it.
1085  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1086  return false;
1087 
1088  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
1089 
1090  DefOMod->setImm(OMod);
1091  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1092  MI.eraseFromParent();
1093  return true;
1094 }
1095 
1096 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1097  if (skipFunction(MF.getFunction()))
1098  return false;
1099 
1100  MRI = &MF.getRegInfo();
1101  ST = &MF.getSubtarget<GCNSubtarget>();
1102  TII = ST->getInstrInfo();
1103  TRI = &TII->getRegisterInfo();
1104 
1106 
1107  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1108  // correctly handle signed zeros.
1109  //
1110  bool IsIEEEMode = ST->enableIEEEBit(MF);
1111  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1112 
1113  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1115  for (I = MBB->begin(); I != MBB->end(); I = Next) {
1116  Next = std::next(I);
1117  MachineInstr &MI = *I;
1118 
1119  tryFoldInst(TII, &MI);
1120 
1121  if (!TII->isFoldableCopy(MI)) {
1122  // TODO: Omod might be OK if there is NSZ only on the source
1123  // instruction, and not the omod multiply.
1124  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1125  !tryFoldOMod(MI))
1126  tryFoldClamp(MI);
1127  continue;
1128  }
1129 
1130  MachineOperand &OpToFold = MI.getOperand(1);
1131  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
1132 
1133  // FIXME: We could also be folding things like TargetIndexes.
1134  if (!FoldingImm && !OpToFold.isReg())
1135  continue;
1136 
1137  if (OpToFold.isReg() &&
1139  continue;
1140 
1141  // Prevent folding operands backwards in the function. For example,
1142  // the COPY opcode must not be replaced by 1 in this example:
1143  //
1144  // %3 = COPY %vgpr0; VGPR_32:%3
1145  // ...
1146  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1147  MachineOperand &Dst = MI.getOperand(0);
1148  if (Dst.isReg() &&
1150  continue;
1151 
1152  foldInstOperand(MI, OpToFold);
1153  }
1154  }
1155  return false;
1156 }
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:521
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:543
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:509
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool isRegSequence() const
unsigned const TargetRegisterInfo * TRI
F(f)
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:154
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:700
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:516
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:79
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:48
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:234
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Register is known to be fully dead.
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define DEBUG_TYPE
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
int64_t getImm() const
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
Class for arbitrary precision integers.
Definition: APInt.h:70
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
FunctionPass * createSIFoldOperandsPass()
static unsigned UseReg(const MachineOperand &MO)
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
const unsigned Kind
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:393
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:295
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:848
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool isImplicit() const
const SIRegisterInfo * getRegisterInfo() const override