LLVM  7.0.0svn
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Support/Debug.h"
24 
25 #define DEBUG_TYPE "si-fold-operands"
26 using namespace llvm;
27 
28 namespace {
29 
30 struct FoldCandidate {
32  union {
33  MachineOperand *OpToFold;
34  uint64_t ImmToFold;
35  int FrameIndexToFold;
36  };
37  unsigned char UseOpNo;
39  bool Commuted;
40 
41  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
42  bool Commuted_ = false) :
43  UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
44  Commuted(Commuted_) {
45  if (FoldOp->isImm()) {
46  ImmToFold = FoldOp->getImm();
47  } else if (FoldOp->isFI()) {
48  FrameIndexToFold = FoldOp->getIndex();
49  } else {
50  assert(FoldOp->isReg());
51  OpToFold = FoldOp;
52  }
53  }
54 
55  bool isFI() const {
56  return Kind == MachineOperand::MO_FrameIndex;
57  }
58 
59  bool isImm() const {
60  return Kind == MachineOperand::MO_Immediate;
61  }
62 
63  bool isReg() const {
64  return Kind == MachineOperand::MO_Register;
65  }
66 
67  bool isCommuted() const {
68  return Commuted;
69  }
70 };
71 
72 class SIFoldOperands : public MachineFunctionPass {
73 public:
74  static char ID;
76  const SIInstrInfo *TII;
77  const SIRegisterInfo *TRI;
78  const SISubtarget *ST;
79 
80  void foldOperand(MachineOperand &OpToFold,
82  unsigned UseOpIdx,
84  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
85 
86  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
87 
88  const MachineOperand *isClamp(const MachineInstr &MI) const;
89  bool tryFoldClamp(MachineInstr &MI);
90 
91  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
92  bool tryFoldOMod(MachineInstr &MI);
93 
94 public:
95  SIFoldOperands() : MachineFunctionPass(ID) {
97  }
98 
99  bool runOnMachineFunction(MachineFunction &MF) override;
100 
101  StringRef getPassName() const override { return "SI Fold Operands"; }
102 
103  void getAnalysisUsage(AnalysisUsage &AU) const override {
104  AU.setPreservesCFG();
106  }
107 };
108 
109 } // End anonymous namespace.
110 
111 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
112  "SI Fold Operands", false, false)
113 
114 char SIFoldOperands::ID = 0;
115 
117 
118 // Wrapper around isInlineConstant that understands special cases when
119 // instruction types are replaced during operand folding.
121  const MachineInstr &UseMI,
122  unsigned OpNo,
123  const MachineOperand &OpToFold) {
124  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
125  return true;
126 
127  unsigned Opc = UseMI.getOpcode();
128  switch (Opc) {
129  case AMDGPU::V_MAC_F32_e64:
130  case AMDGPU::V_MAC_F16_e64: {
131  // Special case for mac. Since this is replaced with mad when folded into
132  // src2, we need to check the legality for the final instruction.
133  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
134  if (static_cast<int>(OpNo) == Src2Idx) {
135  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
136  const MCInstrDesc &MadDesc
137  = TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
138  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
139  }
140  return false;
141  }
142  default:
143  return false;
144  }
145 }
146 
148  return new SIFoldOperands();
149 }
150 
151 static bool updateOperand(FoldCandidate &Fold,
152  const TargetRegisterInfo &TRI) {
153  MachineInstr *MI = Fold.UseMI;
154  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
155  assert(Old.isReg());
156 
157  if (Fold.isImm()) {
158  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
159  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
160  // already set.
161  unsigned Opcode = MI->getOpcode();
162  int OpNo = MI->getOperandNo(&Old);
163  int ModIdx = -1;
164  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
165  ModIdx = AMDGPU::OpName::src0_modifiers;
166  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
167  ModIdx = AMDGPU::OpName::src1_modifiers;
168  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
169  ModIdx = AMDGPU::OpName::src2_modifiers;
170  assert(ModIdx != -1);
171  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
172  MachineOperand &Mod = MI->getOperand(ModIdx);
173  unsigned Val = Mod.getImm();
174  if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
175  return false;
176  // If upper part is all zero we do not need op_sel_hi.
177  if (!isUInt<16>(Fold.ImmToFold)) {
178  if (!(Fold.ImmToFold & 0xffff)) {
179  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
180  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
181  Old.ChangeToImmediate(Fold.ImmToFold >> 16);
182  return true;
183  }
184  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
185  }
186  }
187  Old.ChangeToImmediate(Fold.ImmToFold);
188  return true;
189  }
190 
191  if (Fold.isFI()) {
192  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
193  return true;
194  }
195 
196  MachineOperand *New = Fold.OpToFold;
199  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
200 
201  Old.setIsUndef(New->isUndef());
202  return true;
203  }
204 
205  // FIXME: Handle physical registers.
206 
207  return false;
208 }
209 
211  const MachineInstr *MI) {
212  for (auto Candidate : FoldList) {
213  if (Candidate.UseMI == MI)
214  return true;
215  }
216  return false;
217 }
218 
220  MachineInstr *MI, unsigned OpNo,
221  MachineOperand *OpToFold,
222  const SIInstrInfo *TII) {
223  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
224 
225  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
226  unsigned Opc = MI->getOpcode();
227  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64) &&
228  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
229  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
230 
231  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
232  // to fold the operand.
233  MI->setDesc(TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16));
234  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
235  if (FoldAsMAD) {
236  MI->untieRegOperand(OpNo);
237  return true;
238  }
239  MI->setDesc(TII->get(Opc));
240  }
241 
242  // Special case for s_setreg_b32
243  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
244  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
245  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
246  return true;
247  }
248 
249  // If we are already folding into another operand of MI, then
250  // we can't commute the instruction, otherwise we risk making the
251  // other fold illegal.
252  if (isUseMIInFoldList(FoldList, MI))
253  return false;
254 
255  // Operand is not legal, so try to commute the instruction to
256  // see if this makes it possible to fold.
257  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
258  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
259  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
260 
261  if (CanCommute) {
262  if (CommuteIdx0 == OpNo)
263  OpNo = CommuteIdx1;
264  else if (CommuteIdx1 == OpNo)
265  OpNo = CommuteIdx0;
266  }
267 
268  // One of operands might be an Imm operand, and OpNo may refer to it after
269  // the call of commuteInstruction() below. Such situations are avoided
270  // here explicitly as OpNo must be a register operand to be a candidate
271  // for memory folding.
272  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
273  !MI->getOperand(CommuteIdx1).isReg()))
274  return false;
275 
276  if (!CanCommute ||
277  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
278  return false;
279 
280  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
281  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
282  return false;
283  }
284 
285  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
286  return true;
287  }
288 
289  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
290  return true;
291 }
292 
293 // If the use operand doesn't care about the value, this may be an operand only
294 // used for register indexing, in which case it is unsafe to fold.
295 static bool isUseSafeToFold(const SIInstrInfo *TII,
296  const MachineInstr &MI,
297  const MachineOperand &UseMO) {
298  return !UseMO.isUndef() && !TII->isSDWA(MI);
299  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
300 }
301 
302 void SIFoldOperands::foldOperand(
303  MachineOperand &OpToFold,
305  unsigned UseOpIdx,
307  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
308  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
309 
310  if (!isUseSafeToFold(TII, *UseMI, UseOp))
311  return;
312 
313  // FIXME: Fold operands with subregs.
314  if (UseOp.isReg() && OpToFold.isReg()) {
315  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
316  return;
317 
318  // Don't fold subregister extracts into tied operands, only if it is a full
319  // copy since a subregister use tied to a full register def doesn't really
320  // make sense. e.g. don't fold:
321  //
322  // %1 = COPY %0:sub1
323  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
324  //
325  // into
326  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
327  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
328  return;
329  }
330 
331  // Special case for REG_SEQUENCE: We can't fold literals into
332  // REG_SEQUENCE instructions, so we have to fold them into the
333  // uses of REG_SEQUENCE.
334  if (UseMI->isRegSequence()) {
335  unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
336  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
337 
339  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
340  RSUse != RSE; ++RSUse) {
341 
342  MachineInstr *RSUseMI = RSUse->getParent();
343  if (RSUse->getSubReg() != RegSeqDstSubReg)
344  continue;
345 
346  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
347  CopiesToReplace);
348  }
349 
350  return;
351  }
352 
353 
354  bool FoldingImm = OpToFold.isImm();
355 
356  // In order to fold immediates into copies, we need to change the
357  // copy to a MOV.
358  if (FoldingImm && UseMI->isCopy()) {
359  unsigned DestReg = UseMI->getOperand(0).getReg();
360  const TargetRegisterClass *DestRC
362  MRI->getRegClass(DestReg) :
363  TRI->getPhysRegClass(DestReg);
364 
365  unsigned MovOp = TII->getMovOpcode(DestRC);
366  if (MovOp == AMDGPU::COPY)
367  return;
368 
369  UseMI->setDesc(TII->get(MovOp));
370  CopiesToReplace.push_back(UseMI);
371  } else {
372  const MCInstrDesc &UseDesc = UseMI->getDesc();
373 
374  // Don't fold into target independent nodes. Target independent opcodes
375  // don't have defined register classes.
376  if (UseDesc.isVariadic() ||
377  UseOp.isImplicit() ||
378  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
379  return;
380  }
381 
382  if (!FoldingImm) {
383  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
384 
385  // FIXME: We could try to change the instruction from 64-bit to 32-bit
386  // to enable more folding opportunites. The shrink operands pass
387  // already does this.
388  return;
389  }
390 
391 
392  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
393  const TargetRegisterClass *FoldRC =
394  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
395 
396 
397  // Split 64-bit constants into 32-bits for folding.
398  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
399  unsigned UseReg = UseOp.getReg();
400  const TargetRegisterClass *UseRC
402  MRI->getRegClass(UseReg) :
403  TRI->getPhysRegClass(UseReg);
404 
405  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
406  return;
407 
408  APInt Imm(64, OpToFold.getImm());
409  if (UseOp.getSubReg() == AMDGPU::sub0) {
410  Imm = Imm.getLoBits(32);
411  } else {
412  assert(UseOp.getSubReg() == AMDGPU::sub1);
413  Imm = Imm.getHiBits(32);
414  }
415 
416  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
417  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
418  return;
419  }
420 
421 
422 
423  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
424 }
425 
426 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
427  uint32_t LHS, uint32_t RHS) {
428  switch (Opcode) {
429  case AMDGPU::V_AND_B32_e64:
430  case AMDGPU::V_AND_B32_e32:
431  case AMDGPU::S_AND_B32:
432  Result = LHS & RHS;
433  return true;
434  case AMDGPU::V_OR_B32_e64:
435  case AMDGPU::V_OR_B32_e32:
436  case AMDGPU::S_OR_B32:
437  Result = LHS | RHS;
438  return true;
439  case AMDGPU::V_XOR_B32_e64:
440  case AMDGPU::V_XOR_B32_e32:
441  case AMDGPU::S_XOR_B32:
442  Result = LHS ^ RHS;
443  return true;
444  case AMDGPU::V_LSHL_B32_e64:
445  case AMDGPU::V_LSHL_B32_e32:
446  case AMDGPU::S_LSHL_B32:
447  // The instruction ignores the high bits for out of bounds shifts.
448  Result = LHS << (RHS & 31);
449  return true;
450  case AMDGPU::V_LSHLREV_B32_e64:
451  case AMDGPU::V_LSHLREV_B32_e32:
452  Result = RHS << (LHS & 31);
453  return true;
454  case AMDGPU::V_LSHR_B32_e64:
455  case AMDGPU::V_LSHR_B32_e32:
456  case AMDGPU::S_LSHR_B32:
457  Result = LHS >> (RHS & 31);
458  return true;
459  case AMDGPU::V_LSHRREV_B32_e64:
460  case AMDGPU::V_LSHRREV_B32_e32:
461  Result = RHS >> (LHS & 31);
462  return true;
463  case AMDGPU::V_ASHR_I32_e64:
464  case AMDGPU::V_ASHR_I32_e32:
465  case AMDGPU::S_ASHR_I32:
466  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
467  return true;
468  case AMDGPU::V_ASHRREV_I32_e64:
469  case AMDGPU::V_ASHRREV_I32_e32:
470  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
471  return true;
472  default:
473  return false;
474  }
475 }
476 
477 static unsigned getMovOpc(bool IsScalar) {
478  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
479 }
480 
481 /// Remove any leftover implicit operands from mutating the instruction. e.g.
482 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
483 /// anymore.
485  const MCInstrDesc &Desc = MI.getDesc();
486  unsigned NumOps = Desc.getNumOperands() +
487  Desc.getNumImplicitUses() +
488  Desc.getNumImplicitDefs();
489 
490  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
491  MI.RemoveOperand(I);
492 }
493 
494 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
495  MI.setDesc(NewDesc);
497 }
498 
500  MachineOperand &Op) {
501  if (Op.isReg()) {
502  // If this has a subregister, it obviously is a register source.
503  if (Op.getSubReg() != AMDGPU::NoSubRegister ||
505  return &Op;
506 
507  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
508  if (Def && Def->isMoveImmediate()) {
509  MachineOperand &ImmSrc = Def->getOperand(1);
510  if (ImmSrc.isImm())
511  return &ImmSrc;
512  }
513  }
514 
515  return &Op;
516 }
517 
518 // Try to simplify operations with a constant that may appear after instruction
519 // selection.
520 // TODO: See if a frame index with a fixed offset can fold.
522  const SIInstrInfo *TII,
523  MachineInstr *MI,
524  MachineOperand *ImmOp) {
525  unsigned Opc = MI->getOpcode();
526  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
527  Opc == AMDGPU::S_NOT_B32) {
528  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
529  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
530  return true;
531  }
532 
533  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
534  if (Src1Idx == -1)
535  return false;
536 
537  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
538  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
539  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
540 
541  if (!Src0->isImm() && !Src1->isImm())
542  return false;
543 
544  // and k0, k1 -> v_mov_b32 (k0 & k1)
545  // or k0, k1 -> v_mov_b32 (k0 | k1)
546  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
547  if (Src0->isImm() && Src1->isImm()) {
548  int32_t NewImm;
549  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
550  return false;
551 
552  const SIRegisterInfo &TRI = TII->getRegisterInfo();
553  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
554 
555  // Be careful to change the right operand, src0 may belong to a different
556  // instruction.
557  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
558  MI->RemoveOperand(Src1Idx);
559  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
560  return true;
561  }
562 
563  if (!MI->isCommutable())
564  return false;
565 
566  if (Src0->isImm() && !Src1->isImm()) {
567  std::swap(Src0, Src1);
568  std::swap(Src0Idx, Src1Idx);
569  }
570 
571  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
572  if (Opc == AMDGPU::V_OR_B32_e64 ||
573  Opc == AMDGPU::V_OR_B32_e32 ||
574  Opc == AMDGPU::S_OR_B32) {
575  if (Src1Val == 0) {
576  // y = or x, 0 => y = copy x
577  MI->RemoveOperand(Src1Idx);
578  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
579  } else if (Src1Val == -1) {
580  // y = or x, -1 => y = v_mov_b32 -1
581  MI->RemoveOperand(Src1Idx);
582  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
583  } else
584  return false;
585 
586  return true;
587  }
588 
589  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
590  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
591  MI->getOpcode() == AMDGPU::S_AND_B32) {
592  if (Src1Val == 0) {
593  // y = and x, 0 => y = v_mov_b32 0
594  MI->RemoveOperand(Src0Idx);
595  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
596  } else if (Src1Val == -1) {
597  // y = and x, -1 => y = copy x
598  MI->RemoveOperand(Src1Idx);
599  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
601  } else
602  return false;
603 
604  return true;
605  }
606 
607  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
608  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
609  MI->getOpcode() == AMDGPU::S_XOR_B32) {
610  if (Src1Val == 0) {
611  // y = xor x, 0 => y = copy x
612  MI->RemoveOperand(Src1Idx);
613  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
614  return true;
615  }
616  }
617 
618  return false;
619 }
620 
621 // Try to fold an instruction into a simpler one
622 static bool tryFoldInst(const SIInstrInfo *TII,
623  MachineInstr *MI) {
624  unsigned Opc = MI->getOpcode();
625 
626  if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
627  Opc == AMDGPU::V_CNDMASK_B32_e64 ||
628  Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
629  const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
630  const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
631  if (Src1->isIdenticalTo(*Src0)) {
632  DEBUG(dbgs() << "Folded " << *MI << " into ");
633  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
634  if (Src2Idx != -1)
635  MI->RemoveOperand(Src2Idx);
636  MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
637  mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
638  : getMovOpc(false)));
639  DEBUG(dbgs() << *MI << '\n');
640  return true;
641  }
642  }
643 
644  return false;
645 }
646 
647 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
648  MachineOperand &OpToFold) const {
649  // We need mutate the operands of new mov instructions to add implicit
650  // uses of EXEC, but adding them invalidates the use_iterator, so defer
651  // this.
652  SmallVector<MachineInstr *, 4> CopiesToReplace;
654  MachineOperand &Dst = MI.getOperand(0);
655 
656  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
657  if (FoldingImm) {
658  unsigned NumLiteralUses = 0;
659  MachineOperand *NonInlineUse = nullptr;
660  int NonInlineUseOpNo = -1;
661 
664  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
665  Use != E; Use = NextUse) {
666  NextUse = std::next(Use);
667  MachineInstr *UseMI = Use->getParent();
668  unsigned OpNo = Use.getOperandNo();
669 
670  // Folding the immediate may reveal operations that can be constant
671  // folded or replaced with a copy. This can happen for example after
672  // frame indices are lowered to constants or from splitting 64-bit
673  // constants.
674  //
675  // We may also encounter cases where one or both operands are
676  // immediates materialized into a register, which would ordinarily not
677  // be folded due to multiple uses or operand constraints.
678 
679  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
680  DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n');
681 
682  // Some constant folding cases change the same immediate's use to a new
683  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
684  // again. The same constant folded instruction could also have a second
685  // use operand.
686  NextUse = MRI->use_begin(Dst.getReg());
687  FoldList.clear();
688  continue;
689  }
690 
691  // Try to fold any inline immediate uses, and then only fold other
692  // constants if they have one use.
693  //
694  // The legality of the inline immediate must be checked based on the use
695  // operand, not the defining instruction, because 32-bit instructions
696  // with 32-bit inline immediate sources may be used to materialize
697  // constants used in 16-bit operands.
698  //
699  // e.g. it is unsafe to fold:
700  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
701  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
702 
703  // Folding immediates with more than one use will increase program size.
704  // FIXME: This will also reduce register usage, which may be better
705  // in some cases. A better heuristic is needed.
706  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
707  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
708  } else {
709  if (++NumLiteralUses == 1) {
710  NonInlineUse = &*Use;
711  NonInlineUseOpNo = OpNo;
712  }
713  }
714  }
715 
716  if (NumLiteralUses == 1) {
717  MachineInstr *UseMI = NonInlineUse->getParent();
718  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
719  }
720  } else {
721  // Folding register.
723  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
724  Use != E; ++Use) {
725  MachineInstr *UseMI = Use->getParent();
726 
727  foldOperand(OpToFold, UseMI, Use.getOperandNo(),
728  FoldList, CopiesToReplace);
729  }
730  }
731 
732  MachineFunction *MF = MI.getParent()->getParent();
733  // Make sure we add EXEC uses to any new v_mov instructions created.
734  for (MachineInstr *Copy : CopiesToReplace)
735  Copy->addImplicitDefUseOperands(*MF);
736 
737  for (FoldCandidate &Fold : FoldList) {
738  if (updateOperand(Fold, *TRI)) {
739  // Clear kill flags.
740  if (Fold.isReg()) {
741  assert(Fold.OpToFold && Fold.OpToFold->isReg());
742  // FIXME: Probably shouldn't bother trying to fold if not an
743  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
744  // copies.
745  MRI->clearKillFlags(Fold.OpToFold->getReg());
746  }
747  DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
748  static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
749  tryFoldInst(TII, Fold.UseMI);
750  } else if (Fold.isCommuted()) {
751  // Restoring instruction's original operand order if fold has failed.
752  TII->commuteInstruction(*Fold.UseMI, false);
753  }
754  }
755 }
756 
757 // Clamp patterns are canonically selected to v_max_* instructions, so only
758 // handle them.
759 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
760  unsigned Op = MI.getOpcode();
761  switch (Op) {
762  case AMDGPU::V_MAX_F32_e64:
763  case AMDGPU::V_MAX_F16_e64:
764  case AMDGPU::V_MAX_F64:
765  case AMDGPU::V_PK_MAX_F16: {
766  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
767  return nullptr;
768 
769  // Make sure sources are identical.
770  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
771  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
772  if (!Src0->isReg() || !Src1->isReg() ||
773  Src0->getReg() != Src1->getReg() ||
774  Src0->getSubReg() != Src1->getSubReg() ||
775  Src0->getSubReg() != AMDGPU::NoSubRegister)
776  return nullptr;
777 
778  // Can't fold up if we have modifiers.
779  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
780  return nullptr;
781 
782  unsigned Src0Mods
783  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
784  unsigned Src1Mods
785  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
786 
787  // Having a 0 op_sel_hi would require swizzling the output in the source
788  // instruction, which we can't do.
789  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0;
790  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
791  return nullptr;
792  return Src0;
793  }
794  default:
795  return nullptr;
796  }
797 }
798 
799 // We obviously have multiple uses in a clamp since the register is used twice
800 // in the same instruction.
801 static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
802  int Count = 0;
803  for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
804  I != E; ++I) {
805  if (++Count > 1)
806  return false;
807  }
808 
809  return true;
810 }
811 
812 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
813 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
814  const MachineOperand *ClampSrc = isClamp(MI);
815  if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
816  return false;
817 
818  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
819 
820  // The type of clamp must be compatible.
821  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
822  return false;
823 
824  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
825  if (!DefClamp)
826  return false;
827 
828  DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def << '\n');
829 
830  // Clamp is applied after omod, so it is OK if omod is set.
831  DefClamp->setImm(1);
832  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
833  MI.eraseFromParent();
834  return true;
835 }
836 
837 static int getOModValue(unsigned Opc, int64_t Val) {
838  switch (Opc) {
839  case AMDGPU::V_MUL_F32_e64: {
840  switch (static_cast<uint32_t>(Val)) {
841  case 0x3f000000: // 0.5
842  return SIOutMods::DIV2;
843  case 0x40000000: // 2.0
844  return SIOutMods::MUL2;
845  case 0x40800000: // 4.0
846  return SIOutMods::MUL4;
847  default:
848  return SIOutMods::NONE;
849  }
850  }
851  case AMDGPU::V_MUL_F16_e64: {
852  switch (static_cast<uint16_t>(Val)) {
853  case 0x3800: // 0.5
854  return SIOutMods::DIV2;
855  case 0x4000: // 2.0
856  return SIOutMods::MUL2;
857  case 0x4400: // 4.0
858  return SIOutMods::MUL4;
859  default:
860  return SIOutMods::NONE;
861  }
862  }
863  default:
864  llvm_unreachable("invalid mul opcode");
865  }
866 }
867 
868 // FIXME: Does this really not support denormals with f16?
869 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
870 // handled, so will anything other than that break?
871 std::pair<const MachineOperand *, int>
872 SIFoldOperands::isOMod(const MachineInstr &MI) const {
873  unsigned Op = MI.getOpcode();
874  switch (Op) {
875  case AMDGPU::V_MUL_F32_e64:
876  case AMDGPU::V_MUL_F16_e64: {
877  // If output denormals are enabled, omod is ignored.
878  if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
879  (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
880  return std::make_pair(nullptr, SIOutMods::NONE);
881 
882  const MachineOperand *RegOp = nullptr;
883  const MachineOperand *ImmOp = nullptr;
884  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
885  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
886  if (Src0->isImm()) {
887  ImmOp = Src0;
888  RegOp = Src1;
889  } else if (Src1->isImm()) {
890  ImmOp = Src1;
891  RegOp = Src0;
892  } else
893  return std::make_pair(nullptr, SIOutMods::NONE);
894 
895  int OMod = getOModValue(Op, ImmOp->getImm());
896  if (OMod == SIOutMods::NONE ||
897  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
898  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
899  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
900  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
901  return std::make_pair(nullptr, SIOutMods::NONE);
902 
903  return std::make_pair(RegOp, OMod);
904  }
905  case AMDGPU::V_ADD_F32_e64:
906  case AMDGPU::V_ADD_F16_e64: {
907  // If output denormals are enabled, omod is ignored.
908  if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
909  (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
910  return std::make_pair(nullptr, SIOutMods::NONE);
911 
912  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
913  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
914  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
915 
916  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
917  Src0->getSubReg() == Src1->getSubReg() &&
918  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
919  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
920  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
921  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
922  return std::make_pair(Src0, SIOutMods::MUL2);
923 
924  return std::make_pair(nullptr, SIOutMods::NONE);
925  }
926  default:
927  return std::make_pair(nullptr, SIOutMods::NONE);
928  }
929 }
930 
931 // FIXME: Does this need to check IEEE bit on function?
932 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
933  const MachineOperand *RegOp;
934  int OMod;
935  std::tie(RegOp, OMod) = isOMod(MI);
936  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
937  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
938  !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
939  return false;
940 
941  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
942  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
943  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
944  return false;
945 
946  // Clamp is applied after omod. If the source already has clamp set, don't
947  // fold it.
948  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
949  return false;
950 
951  DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
952 
953  DefOMod->setImm(OMod);
954  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
955  MI.eraseFromParent();
956  return true;
957 }
958 
959 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
960  if (skipFunction(MF.getFunction()))
961  return false;
962 
963  MRI = &MF.getRegInfo();
964  ST = &MF.getSubtarget<SISubtarget>();
965  TII = ST->getInstrInfo();
966  TRI = &TII->getRegisterInfo();
967 
969 
970  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
971  // correctly handle signed zeros.
972  //
973  // TODO: Check nsz on instructions when fast math flags are preserved to MI
974  // level.
975  bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
976 
977  for (MachineBasicBlock *MBB : depth_first(&MF)) {
979  for (I = MBB->begin(); I != MBB->end(); I = Next) {
980  Next = std::next(I);
981  MachineInstr &MI = *I;
982 
983  tryFoldInst(TII, &MI);
984 
985  if (!TII->isFoldableCopy(MI)) {
986  if (IsIEEEMode || !tryFoldOMod(MI))
987  tryFoldClamp(MI);
988  continue;
989  }
990 
991  MachineOperand &OpToFold = MI.getOperand(1);
992  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
993 
994  // FIXME: We could also be folding things like TargetIndexes.
995  if (!FoldingImm && !OpToFold.isReg())
996  continue;
997 
998  if (OpToFold.isReg() &&
1000  continue;
1001 
1002  // Prevent folding operands backwards in the function. For example,
1003  // the COPY opcode must not be replaced by 1 in this example:
1004  //
1005  // %3 = COPY %vgpr0; VGPR_32:%3
1006  // ...
1007  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1008  MachineOperand &Dst = MI.getOperand(0);
1009  if (Dst.isReg() &&
1011  continue;
1012 
1013  foldInstOperand(MI, OpToFold);
1014  }
1015  }
1016  return false;
1017 }
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:515
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:537
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:387
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool isRegSequence() const
Definition: MachineInstr.h:852
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:149
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:528
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:513
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
static bool updateOperand(FoldCandidate &Fold, const TargetRegisterInfo &TRI)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
Reg
All possible values of the reg field in the ModR/M byte.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:82
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:290
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:48
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
unsigned const MachineRegisterInfo * MRI
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:234
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
bool isCopy() const
Definition: MachineInstr.h:860
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define DEBUG_TYPE
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
const SIRegisterInfo * getRegisterInfo() const override
MachineOperand class - Representation of each machine instruction operand.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
int64_t getImm() const
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
Class for arbitrary precision integers.
Definition: APInt.h:69
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:142
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
FunctionPass * createSIFoldOperandsPass()
static unsigned UseReg(const MachineOperand &MO)
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
const unsigned Kind
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:382
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:670
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool isImplicit() const