LLVM  7.0.0svn
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "SIMachineFunctionInfo.h"
22 #include "llvm/Support/Debug.h"
25 
26 #define DEBUG_TYPE "si-fold-operands"
27 using namespace llvm;
28 
29 namespace {
30 
31 struct FoldCandidate {
33  union {
34  MachineOperand *OpToFold;
35  uint64_t ImmToFold;
36  int FrameIndexToFold;
37  };
38  unsigned char UseOpNo;
40  bool Commuted;
41 
42  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
43  bool Commuted_ = false) :
44  UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
45  Commuted(Commuted_) {
46  if (FoldOp->isImm()) {
47  ImmToFold = FoldOp->getImm();
48  } else if (FoldOp->isFI()) {
49  FrameIndexToFold = FoldOp->getIndex();
50  } else {
51  assert(FoldOp->isReg());
52  OpToFold = FoldOp;
53  }
54  }
55 
56  bool isFI() const {
57  return Kind == MachineOperand::MO_FrameIndex;
58  }
59 
60  bool isImm() const {
61  return Kind == MachineOperand::MO_Immediate;
62  }
63 
64  bool isReg() const {
65  return Kind == MachineOperand::MO_Register;
66  }
67 
68  bool isCommuted() const {
69  return Commuted;
70  }
71 };
72 
73 class SIFoldOperands : public MachineFunctionPass {
74 public:
75  static char ID;
77  const SIInstrInfo *TII;
78  const SIRegisterInfo *TRI;
79  const GCNSubtarget *ST;
80 
81  void foldOperand(MachineOperand &OpToFold,
83  unsigned UseOpIdx,
85  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
86 
87  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
88 
89  const MachineOperand *isClamp(const MachineInstr &MI) const;
90  bool tryFoldClamp(MachineInstr &MI);
91 
92  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
93  bool tryFoldOMod(MachineInstr &MI);
94 
95 public:
96  SIFoldOperands() : MachineFunctionPass(ID) {
98  }
99 
100  bool runOnMachineFunction(MachineFunction &MF) override;
101 
102  StringRef getPassName() const override { return "SI Fold Operands"; }
103 
104  void getAnalysisUsage(AnalysisUsage &AU) const override {
105  AU.setPreservesCFG();
107  }
108 };
109 
110 } // End anonymous namespace.
111 
112 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
113  "SI Fold Operands", false, false)
114 
115 char SIFoldOperands::ID = 0;
116 
118 
119 // Wrapper around isInlineConstant that understands special cases when
120 // instruction types are replaced during operand folding.
122  const MachineInstr &UseMI,
123  unsigned OpNo,
124  const MachineOperand &OpToFold) {
125  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
126  return true;
127 
128  unsigned Opc = UseMI.getOpcode();
129  switch (Opc) {
130  case AMDGPU::V_MAC_F32_e64:
131  case AMDGPU::V_MAC_F16_e64:
132  case AMDGPU::V_FMAC_F32_e64: {
133  // Special case for mac. Since this is replaced with mad when folded into
134  // src2, we need to check the legality for the final instruction.
135  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
136  if (static_cast<int>(OpNo) == Src2Idx) {
137  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
138  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
139 
140  unsigned Opc = IsFMA ?
141  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
142  const MCInstrDesc &MadDesc = TII->get(Opc);
143  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
144  }
145  return false;
146  }
147  default:
148  return false;
149  }
150 }
151 
153  return new SIFoldOperands();
154 }
155 
156 static bool updateOperand(FoldCandidate &Fold,
157  const TargetRegisterInfo &TRI) {
158  MachineInstr *MI = Fold.UseMI;
159  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
160  assert(Old.isReg());
161 
162  if (Fold.isImm()) {
163  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
164  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
165  // already set.
166  unsigned Opcode = MI->getOpcode();
167  int OpNo = MI->getOperandNo(&Old);
168  int ModIdx = -1;
169  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
170  ModIdx = AMDGPU::OpName::src0_modifiers;
171  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
172  ModIdx = AMDGPU::OpName::src1_modifiers;
173  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
174  ModIdx = AMDGPU::OpName::src2_modifiers;
175  assert(ModIdx != -1);
176  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
177  MachineOperand &Mod = MI->getOperand(ModIdx);
178  unsigned Val = Mod.getImm();
179  if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
180  return false;
181  // If upper part is all zero we do not need op_sel_hi.
182  if (!isUInt<16>(Fold.ImmToFold)) {
183  if (!(Fold.ImmToFold & 0xffff)) {
184  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
185  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
186  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
187  return true;
188  }
189  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
190  }
191  }
192  Old.ChangeToImmediate(Fold.ImmToFold);
193  return true;
194  }
195 
196  if (Fold.isFI()) {
197  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
198  return true;
199  }
200 
201  MachineOperand *New = Fold.OpToFold;
204  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
205 
206  Old.setIsUndef(New->isUndef());
207  return true;
208  }
209 
210  // FIXME: Handle physical registers.
211 
212  return false;
213 }
214 
216  const MachineInstr *MI) {
217  for (auto Candidate : FoldList) {
218  if (Candidate.UseMI == MI)
219  return true;
220  }
221  return false;
222 }
223 
225  MachineInstr *MI, unsigned OpNo,
226  MachineOperand *OpToFold,
227  const SIInstrInfo *TII) {
228  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
229 
230  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
231  unsigned Opc = MI->getOpcode();
232  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
233  Opc == AMDGPU::V_FMAC_F32_e64) &&
234  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
235  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
236  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
237  unsigned NewOpc = IsFMA ?
238  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
239 
240  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
241  // to fold the operand.
242  MI->setDesc(TII->get(NewOpc));
243  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
244  if (FoldAsMAD) {
245  MI->untieRegOperand(OpNo);
246  return true;
247  }
248  MI->setDesc(TII->get(Opc));
249  }
250 
251  // Special case for s_setreg_b32
252  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
253  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
254  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
255  return true;
256  }
257 
258  // If we are already folding into another operand of MI, then
259  // we can't commute the instruction, otherwise we risk making the
260  // other fold illegal.
261  if (isUseMIInFoldList(FoldList, MI))
262  return false;
263 
264  // Operand is not legal, so try to commute the instruction to
265  // see if this makes it possible to fold.
266  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
267  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
268  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
269 
270  if (CanCommute) {
271  if (CommuteIdx0 == OpNo)
272  OpNo = CommuteIdx1;
273  else if (CommuteIdx1 == OpNo)
274  OpNo = CommuteIdx0;
275  }
276 
277  // One of operands might be an Imm operand, and OpNo may refer to it after
278  // the call of commuteInstruction() below. Such situations are avoided
279  // here explicitly as OpNo must be a register operand to be a candidate
280  // for memory folding.
281  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
282  !MI->getOperand(CommuteIdx1).isReg()))
283  return false;
284 
285  if (!CanCommute ||
286  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
287  return false;
288 
289  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
290  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
291  return false;
292  }
293 
294  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
295  return true;
296  }
297 
298  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
299  return true;
300 }
301 
302 // If the use operand doesn't care about the value, this may be an operand only
303 // used for register indexing, in which case it is unsafe to fold.
304 static bool isUseSafeToFold(const SIInstrInfo *TII,
305  const MachineInstr &MI,
306  const MachineOperand &UseMO) {
307  return !UseMO.isUndef() && !TII->isSDWA(MI);
308  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
309 }
310 
311 void SIFoldOperands::foldOperand(
312  MachineOperand &OpToFold,
314  unsigned UseOpIdx,
316  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
317  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
318 
319  if (!isUseSafeToFold(TII, *UseMI, UseOp))
320  return;
321 
322  // FIXME: Fold operands with subregs.
323  if (UseOp.isReg() && OpToFold.isReg()) {
324  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
325  return;
326 
327  // Don't fold subregister extracts into tied operands, only if it is a full
328  // copy since a subregister use tied to a full register def doesn't really
329  // make sense. e.g. don't fold:
330  //
331  // %1 = COPY %0:sub1
332  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
333  //
334  // into
335  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
336  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
337  return;
338  }
339 
340  // Special case for REG_SEQUENCE: We can't fold literals into
341  // REG_SEQUENCE instructions, so we have to fold them into the
342  // uses of REG_SEQUENCE.
343  if (UseMI->isRegSequence()) {
344  unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
345  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
346 
348  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
349  RSUse != RSE; ++RSUse) {
350 
351  MachineInstr *RSUseMI = RSUse->getParent();
352  if (RSUse->getSubReg() != RegSeqDstSubReg)
353  continue;
354 
355  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
356  CopiesToReplace);
357  }
358 
359  return;
360  }
361 
362 
363  bool FoldingImm = OpToFold.isImm();
364 
365  // In order to fold immediates into copies, we need to change the
366  // copy to a MOV.
367  if (FoldingImm && UseMI->isCopy()) {
368  unsigned DestReg = UseMI->getOperand(0).getReg();
369  const TargetRegisterClass *DestRC
371  MRI->getRegClass(DestReg) :
372  TRI->getPhysRegClass(DestReg);
373 
374  unsigned MovOp = TII->getMovOpcode(DestRC);
375  if (MovOp == AMDGPU::COPY)
376  return;
377 
378  UseMI->setDesc(TII->get(MovOp));
379  CopiesToReplace.push_back(UseMI);
380  } else {
381  const MCInstrDesc &UseDesc = UseMI->getDesc();
382 
383  // Don't fold into target independent nodes. Target independent opcodes
384  // don't have defined register classes.
385  if (UseDesc.isVariadic() ||
386  UseOp.isImplicit() ||
387  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
388  return;
389  }
390 
391  if (!FoldingImm) {
392  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
393 
394  // FIXME: We could try to change the instruction from 64-bit to 32-bit
395  // to enable more folding opportunites. The shrink operands pass
396  // already does this.
397  return;
398  }
399 
400 
401  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
402  const TargetRegisterClass *FoldRC =
403  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
404 
405 
406  // Split 64-bit constants into 32-bits for folding.
407  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
408  unsigned UseReg = UseOp.getReg();
409  const TargetRegisterClass *UseRC
411  MRI->getRegClass(UseReg) :
412  TRI->getPhysRegClass(UseReg);
413 
414  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
415  return;
416 
417  APInt Imm(64, OpToFold.getImm());
418  if (UseOp.getSubReg() == AMDGPU::sub0) {
419  Imm = Imm.getLoBits(32);
420  } else {
421  assert(UseOp.getSubReg() == AMDGPU::sub1);
422  Imm = Imm.getHiBits(32);
423  }
424 
425  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
426  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
427  return;
428  }
429 
430 
431 
432  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
433 }
434 
435 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
436  uint32_t LHS, uint32_t RHS) {
437  switch (Opcode) {
438  case AMDGPU::V_AND_B32_e64:
439  case AMDGPU::V_AND_B32_e32:
440  case AMDGPU::S_AND_B32:
441  Result = LHS & RHS;
442  return true;
443  case AMDGPU::V_OR_B32_e64:
444  case AMDGPU::V_OR_B32_e32:
445  case AMDGPU::S_OR_B32:
446  Result = LHS | RHS;
447  return true;
448  case AMDGPU::V_XOR_B32_e64:
449  case AMDGPU::V_XOR_B32_e32:
450  case AMDGPU::S_XOR_B32:
451  Result = LHS ^ RHS;
452  return true;
453  case AMDGPU::V_LSHL_B32_e64:
454  case AMDGPU::V_LSHL_B32_e32:
455  case AMDGPU::S_LSHL_B32:
456  // The instruction ignores the high bits for out of bounds shifts.
457  Result = LHS << (RHS & 31);
458  return true;
459  case AMDGPU::V_LSHLREV_B32_e64:
460  case AMDGPU::V_LSHLREV_B32_e32:
461  Result = RHS << (LHS & 31);
462  return true;
463  case AMDGPU::V_LSHR_B32_e64:
464  case AMDGPU::V_LSHR_B32_e32:
465  case AMDGPU::S_LSHR_B32:
466  Result = LHS >> (RHS & 31);
467  return true;
468  case AMDGPU::V_LSHRREV_B32_e64:
469  case AMDGPU::V_LSHRREV_B32_e32:
470  Result = RHS >> (LHS & 31);
471  return true;
472  case AMDGPU::V_ASHR_I32_e64:
473  case AMDGPU::V_ASHR_I32_e32:
474  case AMDGPU::S_ASHR_I32:
475  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
476  return true;
477  case AMDGPU::V_ASHRREV_I32_e64:
478  case AMDGPU::V_ASHRREV_I32_e32:
479  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
480  return true;
481  default:
482  return false;
483  }
484 }
485 
486 static unsigned getMovOpc(bool IsScalar) {
487  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
488 }
489 
490 /// Remove any leftover implicit operands from mutating the instruction. e.g.
491 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
492 /// anymore.
494  const MCInstrDesc &Desc = MI.getDesc();
495  unsigned NumOps = Desc.getNumOperands() +
496  Desc.getNumImplicitUses() +
497  Desc.getNumImplicitDefs();
498 
499  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
500  MI.RemoveOperand(I);
501 }
502 
503 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
504  MI.setDesc(NewDesc);
506 }
507 
509  MachineOperand &Op) {
510  if (Op.isReg()) {
511  // If this has a subregister, it obviously is a register source.
512  if (Op.getSubReg() != AMDGPU::NoSubRegister ||
514  return &Op;
515 
516  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
517  if (Def && Def->isMoveImmediate()) {
518  MachineOperand &ImmSrc = Def->getOperand(1);
519  if (ImmSrc.isImm())
520  return &ImmSrc;
521  }
522  }
523 
524  return &Op;
525 }
526 
527 // Try to simplify operations with a constant that may appear after instruction
528 // selection.
529 // TODO: See if a frame index with a fixed offset can fold.
531  const SIInstrInfo *TII,
532  MachineInstr *MI,
533  MachineOperand *ImmOp) {
534  unsigned Opc = MI->getOpcode();
535  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
536  Opc == AMDGPU::S_NOT_B32) {
537  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
538  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
539  return true;
540  }
541 
542  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
543  if (Src1Idx == -1)
544  return false;
545 
546  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
547  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
548  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
549 
550  if (!Src0->isImm() && !Src1->isImm())
551  return false;
552 
553  // and k0, k1 -> v_mov_b32 (k0 & k1)
554  // or k0, k1 -> v_mov_b32 (k0 | k1)
555  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
556  if (Src0->isImm() && Src1->isImm()) {
557  int32_t NewImm;
558  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
559  return false;
560 
561  const SIRegisterInfo &TRI = TII->getRegisterInfo();
562  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
563 
564  // Be careful to change the right operand, src0 may belong to a different
565  // instruction.
566  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
567  MI->RemoveOperand(Src1Idx);
568  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
569  return true;
570  }
571 
572  if (!MI->isCommutable())
573  return false;
574 
575  if (Src0->isImm() && !Src1->isImm()) {
576  std::swap(Src0, Src1);
577  std::swap(Src0Idx, Src1Idx);
578  }
579 
580  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
581  if (Opc == AMDGPU::V_OR_B32_e64 ||
582  Opc == AMDGPU::V_OR_B32_e32 ||
583  Opc == AMDGPU::S_OR_B32) {
584  if (Src1Val == 0) {
585  // y = or x, 0 => y = copy x
586  MI->RemoveOperand(Src1Idx);
587  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
588  } else if (Src1Val == -1) {
589  // y = or x, -1 => y = v_mov_b32 -1
590  MI->RemoveOperand(Src1Idx);
591  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
592  } else
593  return false;
594 
595  return true;
596  }
597 
598  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
599  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
600  MI->getOpcode() == AMDGPU::S_AND_B32) {
601  if (Src1Val == 0) {
602  // y = and x, 0 => y = v_mov_b32 0
603  MI->RemoveOperand(Src0Idx);
604  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
605  } else if (Src1Val == -1) {
606  // y = and x, -1 => y = copy x
607  MI->RemoveOperand(Src1Idx);
608  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
610  } else
611  return false;
612 
613  return true;
614  }
615 
616  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
617  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
618  MI->getOpcode() == AMDGPU::S_XOR_B32) {
619  if (Src1Val == 0) {
620  // y = xor x, 0 => y = copy x
621  MI->RemoveOperand(Src1Idx);
622  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
623  return true;
624  }
625  }
626 
627  return false;
628 }
629 
630 // Try to fold an instruction into a simpler one
631 static bool tryFoldInst(const SIInstrInfo *TII,
632  MachineInstr *MI) {
633  unsigned Opc = MI->getOpcode();
634 
635  if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
636  Opc == AMDGPU::V_CNDMASK_B32_e64 ||
637  Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
638  const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
639  const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
640  if (Src1->isIdenticalTo(*Src0)) {
641  LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
642  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
643  if (Src2Idx != -1)
644  MI->RemoveOperand(Src2Idx);
645  MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
646  mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
647  : getMovOpc(false)));
648  LLVM_DEBUG(dbgs() << *MI << '\n');
649  return true;
650  }
651  }
652 
653  return false;
654 }
655 
656 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
657  MachineOperand &OpToFold) const {
658  // We need mutate the operands of new mov instructions to add implicit
659  // uses of EXEC, but adding them invalidates the use_iterator, so defer
660  // this.
661  SmallVector<MachineInstr *, 4> CopiesToReplace;
663  MachineOperand &Dst = MI.getOperand(0);
664 
665  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
666  if (FoldingImm) {
667  unsigned NumLiteralUses = 0;
668  MachineOperand *NonInlineUse = nullptr;
669  int NonInlineUseOpNo = -1;
670 
673  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
674  Use != E; Use = NextUse) {
675  NextUse = std::next(Use);
676  MachineInstr *UseMI = Use->getParent();
677  unsigned OpNo = Use.getOperandNo();
678 
679  // Folding the immediate may reveal operations that can be constant
680  // folded or replaced with a copy. This can happen for example after
681  // frame indices are lowered to constants or from splitting 64-bit
682  // constants.
683  //
684  // We may also encounter cases where one or both operands are
685  // immediates materialized into a register, which would ordinarily not
686  // be folded due to multiple uses or operand constraints.
687 
688  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
689  LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n');
690 
691  // Some constant folding cases change the same immediate's use to a new
692  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
693  // again. The same constant folded instruction could also have a second
694  // use operand.
695  NextUse = MRI->use_begin(Dst.getReg());
696  FoldList.clear();
697  continue;
698  }
699 
700  // Try to fold any inline immediate uses, and then only fold other
701  // constants if they have one use.
702  //
703  // The legality of the inline immediate must be checked based on the use
704  // operand, not the defining instruction, because 32-bit instructions
705  // with 32-bit inline immediate sources may be used to materialize
706  // constants used in 16-bit operands.
707  //
708  // e.g. it is unsafe to fold:
709  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
710  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
711 
712  // Folding immediates with more than one use will increase program size.
713  // FIXME: This will also reduce register usage, which may be better
714  // in some cases. A better heuristic is needed.
715  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
716  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
717  } else {
718  if (++NumLiteralUses == 1) {
719  NonInlineUse = &*Use;
720  NonInlineUseOpNo = OpNo;
721  }
722  }
723  }
724 
725  if (NumLiteralUses == 1) {
726  MachineInstr *UseMI = NonInlineUse->getParent();
727  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
728  }
729  } else {
730  // Folding register.
732  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
733  Use != E; ++Use) {
734  MachineInstr *UseMI = Use->getParent();
735 
736  foldOperand(OpToFold, UseMI, Use.getOperandNo(),
737  FoldList, CopiesToReplace);
738  }
739  }
740 
741  MachineFunction *MF = MI.getParent()->getParent();
742  // Make sure we add EXEC uses to any new v_mov instructions created.
743  for (MachineInstr *Copy : CopiesToReplace)
744  Copy->addImplicitDefUseOperands(*MF);
745 
746  for (FoldCandidate &Fold : FoldList) {
747  if (updateOperand(Fold, *TRI)) {
748  // Clear kill flags.
749  if (Fold.isReg()) {
750  assert(Fold.OpToFold && Fold.OpToFold->isReg());
751  // FIXME: Probably shouldn't bother trying to fold if not an
752  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
753  // copies.
754  MRI->clearKillFlags(Fold.OpToFold->getReg());
755  }
756  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
757  << static_cast<int>(Fold.UseOpNo) << " of "
758  << *Fold.UseMI << '\n');
759  tryFoldInst(TII, Fold.UseMI);
760  } else if (Fold.isCommuted()) {
761  // Restoring instruction's original operand order if fold has failed.
762  TII->commuteInstruction(*Fold.UseMI, false);
763  }
764  }
765 }
766 
767 // Clamp patterns are canonically selected to v_max_* instructions, so only
768 // handle them.
769 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
770  unsigned Op = MI.getOpcode();
771  switch (Op) {
772  case AMDGPU::V_MAX_F32_e64:
773  case AMDGPU::V_MAX_F16_e64:
774  case AMDGPU::V_MAX_F64:
775  case AMDGPU::V_PK_MAX_F16: {
776  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
777  return nullptr;
778 
779  // Make sure sources are identical.
780  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
781  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
782  if (!Src0->isReg() || !Src1->isReg() ||
783  Src0->getReg() != Src1->getReg() ||
784  Src0->getSubReg() != Src1->getSubReg() ||
785  Src0->getSubReg() != AMDGPU::NoSubRegister)
786  return nullptr;
787 
788  // Can't fold up if we have modifiers.
789  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
790  return nullptr;
791 
792  unsigned Src0Mods
793  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
794  unsigned Src1Mods
795  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
796 
797  // Having a 0 op_sel_hi would require swizzling the output in the source
798  // instruction, which we can't do.
799  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0;
800  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
801  return nullptr;
802  return Src0;
803  }
804  default:
805  return nullptr;
806  }
807 }
808 
809 // We obviously have multiple uses in a clamp since the register is used twice
810 // in the same instruction.
811 static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
812  int Count = 0;
813  for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
814  I != E; ++I) {
815  if (++Count > 1)
816  return false;
817  }
818 
819  return true;
820 }
821 
822 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
823 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
824  const MachineOperand *ClampSrc = isClamp(MI);
825  if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
826  return false;
827 
828  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
829 
830  // The type of clamp must be compatible.
831  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
832  return false;
833 
834  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
835  if (!DefClamp)
836  return false;
837 
838  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def
839  << '\n');
840 
841  // Clamp is applied after omod, so it is OK if omod is set.
842  DefClamp->setImm(1);
843  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
844  MI.eraseFromParent();
845  return true;
846 }
847 
848 static int getOModValue(unsigned Opc, int64_t Val) {
849  switch (Opc) {
850  case AMDGPU::V_MUL_F32_e64: {
851  switch (static_cast<uint32_t>(Val)) {
852  case 0x3f000000: // 0.5
853  return SIOutMods::DIV2;
854  case 0x40000000: // 2.0
855  return SIOutMods::MUL2;
856  case 0x40800000: // 4.0
857  return SIOutMods::MUL4;
858  default:
859  return SIOutMods::NONE;
860  }
861  }
862  case AMDGPU::V_MUL_F16_e64: {
863  switch (static_cast<uint16_t>(Val)) {
864  case 0x3800: // 0.5
865  return SIOutMods::DIV2;
866  case 0x4000: // 2.0
867  return SIOutMods::MUL2;
868  case 0x4400: // 4.0
869  return SIOutMods::MUL4;
870  default:
871  return SIOutMods::NONE;
872  }
873  }
874  default:
875  llvm_unreachable("invalid mul opcode");
876  }
877 }
878 
879 // FIXME: Does this really not support denormals with f16?
880 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
881 // handled, so will anything other than that break?
882 std::pair<const MachineOperand *, int>
883 SIFoldOperands::isOMod(const MachineInstr &MI) const {
884  unsigned Op = MI.getOpcode();
885  switch (Op) {
886  case AMDGPU::V_MUL_F32_e64:
887  case AMDGPU::V_MUL_F16_e64: {
888  // If output denormals are enabled, omod is ignored.
889  if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
890  (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
891  return std::make_pair(nullptr, SIOutMods::NONE);
892 
893  const MachineOperand *RegOp = nullptr;
894  const MachineOperand *ImmOp = nullptr;
895  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
896  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
897  if (Src0->isImm()) {
898  ImmOp = Src0;
899  RegOp = Src1;
900  } else if (Src1->isImm()) {
901  ImmOp = Src1;
902  RegOp = Src0;
903  } else
904  return std::make_pair(nullptr, SIOutMods::NONE);
905 
906  int OMod = getOModValue(Op, ImmOp->getImm());
907  if (OMod == SIOutMods::NONE ||
908  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
909  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
910  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
911  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
912  return std::make_pair(nullptr, SIOutMods::NONE);
913 
914  return std::make_pair(RegOp, OMod);
915  }
916  case AMDGPU::V_ADD_F32_e64:
917  case AMDGPU::V_ADD_F16_e64: {
918  // If output denormals are enabled, omod is ignored.
919  if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
920  (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
921  return std::make_pair(nullptr, SIOutMods::NONE);
922 
923  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
924  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
925  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
926 
927  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
928  Src0->getSubReg() == Src1->getSubReg() &&
929  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
930  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
931  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
932  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
933  return std::make_pair(Src0, SIOutMods::MUL2);
934 
935  return std::make_pair(nullptr, SIOutMods::NONE);
936  }
937  default:
938  return std::make_pair(nullptr, SIOutMods::NONE);
939  }
940 }
941 
942 // FIXME: Does this need to check IEEE bit on function?
943 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
944  const MachineOperand *RegOp;
945  int OMod;
946  std::tie(RegOp, OMod) = isOMod(MI);
947  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
948  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
949  !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
950  return false;
951 
952  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
953  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
954  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
955  return false;
956 
957  // Clamp is applied after omod. If the source already has clamp set, don't
958  // fold it.
959  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
960  return false;
961 
962  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
963 
964  DefOMod->setImm(OMod);
965  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
966  MI.eraseFromParent();
967  return true;
968 }
969 
970 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
971  if (skipFunction(MF.getFunction()))
972  return false;
973 
974  MRI = &MF.getRegInfo();
975  ST = &MF.getSubtarget<GCNSubtarget>();
976  TII = ST->getInstrInfo();
977  TRI = &TII->getRegisterInfo();
978 
980 
981  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
982  // correctly handle signed zeros.
983  //
984  // TODO: Check nsz on instructions when fast math flags are preserved to MI
985  // level.
986  bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
987 
988  for (MachineBasicBlock *MBB : depth_first(&MF)) {
990  for (I = MBB->begin(); I != MBB->end(); I = Next) {
991  Next = std::next(I);
992  MachineInstr &MI = *I;
993 
994  tryFoldInst(TII, &MI);
995 
996  if (!TII->isFoldableCopy(MI)) {
997  if (IsIEEEMode || !tryFoldOMod(MI))
998  tryFoldClamp(MI);
999  continue;
1000  }
1001 
1002  MachineOperand &OpToFold = MI.getOperand(1);
1003  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
1004 
1005  // FIXME: We could also be folding things like TargetIndexes.
1006  if (!FoldingImm && !OpToFold.isReg())
1007  continue;
1008 
1009  if (OpToFold.isReg() &&
1011  continue;
1012 
1013  // Prevent folding operands backwards in the function. For example,
1014  // the COPY opcode must not be replaced by 1 in this example:
1015  //
1016  // %3 = COPY %vgpr0; VGPR_32:%3
1017  // ...
1018  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1019  MachineOperand &Dst = MI.getOperand(0);
1020  if (Dst.isReg() &&
1022  continue;
1023 
1024  foldInstOperand(MI, OpToFold);
1025  }
1026  }
1027  return false;
1028 }
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:520
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:542
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:162
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:411
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool isRegSequence() const
Definition: MachineInstr.h:884
unsigned const TargetRegisterInfo * TRI
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:152
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:552
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:514
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:209
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
static bool updateOperand(FoldCandidate &Fold, const TargetRegisterInfo &TRI)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:79
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:308
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:48
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
unsigned const MachineRegisterInfo * MRI
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:233
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
bool isCopy() const
Definition: MachineInstr.h:892
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define DEBUG_TYPE
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
MachineOperand class - Representation of each machine instruction operand.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
int64_t getImm() const
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
Class for arbitrary precision integers.
Definition: APInt.h:69
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:156
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
FunctionPass * createSIFoldOperandsPass()
static unsigned UseReg(const MachineOperand &MO)
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
const unsigned Kind
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:388
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:173
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
#define LLVM_DEBUG(X)
Definition: Debug.h:119
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:700
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool isImplicit() const
const SIRegisterInfo * getRegisterInfo() const override