LLVM  9.0.0svn
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIInstrInfo.h"
14 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Support/Debug.h"
24 
25 #define DEBUG_TYPE "si-fold-operands"
26 using namespace llvm;
27 
28 namespace {
29 
30 struct FoldCandidate {
32  union {
33  MachineOperand *OpToFold;
34  uint64_t ImmToFold;
35  int FrameIndexToFold;
36  };
37  int ShrinkOpcode;
38  unsigned char UseOpNo;
40  bool Commuted;
41 
42  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
43  bool Commuted_ = false,
44  int ShrinkOp = -1) :
45  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
46  Kind(FoldOp->getType()),
47  Commuted(Commuted_) {
48  if (FoldOp->isImm()) {
49  ImmToFold = FoldOp->getImm();
50  } else if (FoldOp->isFI()) {
51  FrameIndexToFold = FoldOp->getIndex();
52  } else {
53  assert(FoldOp->isReg());
54  OpToFold = FoldOp;
55  }
56  }
57 
58  bool isFI() const {
59  return Kind == MachineOperand::MO_FrameIndex;
60  }
61 
62  bool isImm() const {
63  return Kind == MachineOperand::MO_Immediate;
64  }
65 
66  bool isReg() const {
67  return Kind == MachineOperand::MO_Register;
68  }
69 
70  bool isCommuted() const {
71  return Commuted;
72  }
73 
74  bool needsShrink() const {
75  return ShrinkOpcode != -1;
76  }
77 
78  int getShrinkOpcode() const {
79  return ShrinkOpcode;
80  }
81 };
82 
83 class SIFoldOperands : public MachineFunctionPass {
84 public:
85  static char ID;
87  const SIInstrInfo *TII;
88  const SIRegisterInfo *TRI;
89  const GCNSubtarget *ST;
90 
91  void foldOperand(MachineOperand &OpToFold,
93  unsigned UseOpIdx,
95  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
96 
97  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
98 
99  const MachineOperand *isClamp(const MachineInstr &MI) const;
100  bool tryFoldClamp(MachineInstr &MI);
101 
102  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
103  bool tryFoldOMod(MachineInstr &MI);
104 
105 public:
106  SIFoldOperands() : MachineFunctionPass(ID) {
108  }
109 
110  bool runOnMachineFunction(MachineFunction &MF) override;
111 
112  StringRef getPassName() const override { return "SI Fold Operands"; }
113 
114  void getAnalysisUsage(AnalysisUsage &AU) const override {
115  AU.setPreservesCFG();
117  }
118 };
119 
120 } // End anonymous namespace.
121 
122 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
123  "SI Fold Operands", false, false)
124 
125 char SIFoldOperands::ID = 0;
126 
128 
129 // Wrapper around isInlineConstant that understands special cases when
130 // instruction types are replaced during operand folding.
132  const MachineInstr &UseMI,
133  unsigned OpNo,
134  const MachineOperand &OpToFold) {
135  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
136  return true;
137 
138  unsigned Opc = UseMI.getOpcode();
139  switch (Opc) {
140  case AMDGPU::V_MAC_F32_e64:
141  case AMDGPU::V_MAC_F16_e64:
142  case AMDGPU::V_FMAC_F32_e64: {
143  // Special case for mac. Since this is replaced with mad when folded into
144  // src2, we need to check the legality for the final instruction.
145  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
146  if (static_cast<int>(OpNo) == Src2Idx) {
147  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
148  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
149 
150  unsigned Opc = IsFMA ?
151  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
152  const MCInstrDesc &MadDesc = TII->get(Opc);
153  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
154  }
155  return false;
156  }
157  default:
158  return false;
159  }
160 }
161 
163  return new SIFoldOperands();
164 }
165 
166 static bool updateOperand(FoldCandidate &Fold,
167  const SIInstrInfo &TII,
168  const TargetRegisterInfo &TRI) {
169  MachineInstr *MI = Fold.UseMI;
170  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
171  assert(Old.isReg());
172 
173  if (Fold.isImm()) {
174  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
175  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
176  // already set.
177  unsigned Opcode = MI->getOpcode();
178  int OpNo = MI->getOperandNo(&Old);
179  int ModIdx = -1;
180  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
181  ModIdx = AMDGPU::OpName::src0_modifiers;
182  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
183  ModIdx = AMDGPU::OpName::src1_modifiers;
184  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
185  ModIdx = AMDGPU::OpName::src2_modifiers;
186  assert(ModIdx != -1);
187  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
188  MachineOperand &Mod = MI->getOperand(ModIdx);
189  unsigned Val = Mod.getImm();
190  if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
191  return false;
192  // If upper part is all zero we do not need op_sel_hi.
193  if (!isUInt<16>(Fold.ImmToFold)) {
194  if (!(Fold.ImmToFold & 0xffff)) {
195  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
196  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
197  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
198  return true;
199  }
200  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
201  }
202  }
203 
204  if (Fold.needsShrink()) {
205  MachineBasicBlock *MBB = MI->getParent();
206  auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
207  if (Liveness != MachineBasicBlock::LQR_Dead)
208  return false;
209 
211  int Op32 = Fold.getShrinkOpcode();
212  MachineOperand &Dst0 = MI->getOperand(0);
213  MachineOperand &Dst1 = MI->getOperand(1);
214  assert(Dst0.isDef() && Dst1.isDef());
215 
216  bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
217 
218  const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
219  unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
220  const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
221  unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
222 
223  MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
224 
225  if (HaveNonDbgCarryUse) {
226  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
227  .addReg(AMDGPU::VCC, RegState::Kill);
228  }
229 
230  // Keep the old instruction around to avoid breaking iterators, but
231  // replace the outputs with dummy registers.
232  Dst0.setReg(NewReg0);
233  Dst1.setReg(NewReg1);
234 
235  if (Fold.isCommuted())
236  TII.commuteInstruction(*Inst32, false);
237  return true;
238  }
239 
240  Old.ChangeToImmediate(Fold.ImmToFold);
241  return true;
242  }
243 
244  assert(!Fold.needsShrink() && "not handled");
245 
246  if (Fold.isFI()) {
247  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
248  return true;
249  }
250 
251  MachineOperand *New = Fold.OpToFold;
254  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
255 
256  Old.setIsUndef(New->isUndef());
257  return true;
258  }
259 
260  // FIXME: Handle physical registers.
261 
262  return false;
263 }
264 
266  const MachineInstr *MI) {
267  for (auto Candidate : FoldList) {
268  if (Candidate.UseMI == MI)
269  return true;
270  }
271  return false;
272 }
273 
275  MachineInstr *MI, unsigned OpNo,
276  MachineOperand *OpToFold,
277  const SIInstrInfo *TII) {
278  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
279 
280  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
281  unsigned Opc = MI->getOpcode();
282  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
283  Opc == AMDGPU::V_FMAC_F32_e64) &&
284  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
285  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
286  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
287  unsigned NewOpc = IsFMA ?
288  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
289 
290  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
291  // to fold the operand.
292  MI->setDesc(TII->get(NewOpc));
293  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
294  if (FoldAsMAD) {
295  MI->untieRegOperand(OpNo);
296  return true;
297  }
298  MI->setDesc(TII->get(Opc));
299  }
300 
301  // Special case for s_setreg_b32
302  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
303  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
304  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
305  return true;
306  }
307 
308  // If we are already folding into another operand of MI, then
309  // we can't commute the instruction, otherwise we risk making the
310  // other fold illegal.
311  if (isUseMIInFoldList(FoldList, MI))
312  return false;
313 
314  unsigned CommuteOpNo = OpNo;
315 
316  // Operand is not legal, so try to commute the instruction to
317  // see if this makes it possible to fold.
318  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
319  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
320  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
321 
322  if (CanCommute) {
323  if (CommuteIdx0 == OpNo)
324  CommuteOpNo = CommuteIdx1;
325  else if (CommuteIdx1 == OpNo)
326  CommuteOpNo = CommuteIdx0;
327  }
328 
329 
330  // One of operands might be an Imm operand, and OpNo may refer to it after
331  // the call of commuteInstruction() below. Such situations are avoided
332  // here explicitly as OpNo must be a register operand to be a candidate
333  // for memory folding.
334  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
335  !MI->getOperand(CommuteIdx1).isReg()))
336  return false;
337 
338  if (!CanCommute ||
339  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
340  return false;
341 
342  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
343  if ((Opc == AMDGPU::V_ADD_I32_e64 ||
344  Opc == AMDGPU::V_SUB_I32_e64 ||
345  Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
346  OpToFold->isImm()) {
348 
349  // Verify the other operand is a VGPR, otherwise we would violate the
350  // constant bus restriction.
351  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
352  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
353  if (!OtherOp.isReg() ||
354  !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
355  return false;
356 
357  assert(MI->getOperand(1).isDef());
358 
359  int Op32 = AMDGPU::getVOPe32(Opc);
360  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
361  Op32));
362  return true;
363  }
364 
365  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
366  return false;
367  }
368 
369  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
370  return true;
371  }
372 
373  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
374  return true;
375 }
376 
377 // If the use operand doesn't care about the value, this may be an operand only
378 // used for register indexing, in which case it is unsafe to fold.
379 static bool isUseSafeToFold(const SIInstrInfo *TII,
380  const MachineInstr &MI,
381  const MachineOperand &UseMO) {
382  return !UseMO.isUndef() && !TII->isSDWA(MI);
383  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
384 }
385 
386 void SIFoldOperands::foldOperand(
387  MachineOperand &OpToFold,
389  unsigned UseOpIdx,
391  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
392  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
393 
394  if (!isUseSafeToFold(TII, *UseMI, UseOp))
395  return;
396 
397  // FIXME: Fold operands with subregs.
398  if (UseOp.isReg() && OpToFold.isReg()) {
399  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
400  return;
401 
402  // Don't fold subregister extracts into tied operands, only if it is a full
403  // copy since a subregister use tied to a full register def doesn't really
404  // make sense. e.g. don't fold:
405  //
406  // %1 = COPY %0:sub1
407  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
408  //
409  // into
410  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
411  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
412  return;
413  }
414 
415  // Special case for REG_SEQUENCE: We can't fold literals into
416  // REG_SEQUENCE instructions, so we have to fold them into the
417  // uses of REG_SEQUENCE.
418  if (UseMI->isRegSequence()) {
419  unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
420  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
421 
423  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
424  RSUse != RSE; ++RSUse) {
425 
426  MachineInstr *RSUseMI = RSUse->getParent();
427  if (RSUse->getSubReg() != RegSeqDstSubReg)
428  continue;
429 
430  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
431  CopiesToReplace);
432  }
433 
434  return;
435  }
436 
437 
438  bool FoldingImm = OpToFold.isImm();
439 
440  if (FoldingImm && UseMI->isCopy()) {
441  unsigned DestReg = UseMI->getOperand(0).getReg();
442  const TargetRegisterClass *DestRC
444  MRI->getRegClass(DestReg) :
445  TRI->getPhysRegClass(DestReg);
446 
447  unsigned SrcReg = UseMI->getOperand(1).getReg();
450  const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
451  if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
455  Use = MRI->use_begin(DestReg), E = MRI->use_end();
456  Use != E; Use = NextUse) {
457  NextUse = std::next(Use);
458  FoldCandidate FC = FoldCandidate(Use->getParent(),
459  Use.getOperandNo(), &UseMI->getOperand(1));
460  CopyUses.push_back(FC);
461  }
462  for (auto & F : CopyUses) {
463  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
464  FoldList, CopiesToReplace);
465  }
466  }
467  }
468 
469  // In order to fold immediates into copies, we need to change the
470  // copy to a MOV.
471 
472  unsigned MovOp = TII->getMovOpcode(DestRC);
473  if (MovOp == AMDGPU::COPY)
474  return;
475 
476  UseMI->setDesc(TII->get(MovOp));
477  CopiesToReplace.push_back(UseMI);
478  } else {
479  if (UseMI->isCopy() && OpToFold.isReg() &&
482  TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
483  TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
484  !UseMI->getOperand(1).getSubReg()) {
485  UseMI->getOperand(1).setReg(OpToFold.getReg());
486  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
487  UseMI->getOperand(1).setIsKill(false);
488  CopiesToReplace.push_back(UseMI);
489  OpToFold.setIsKill(false);
490  return;
491  }
492 
493  const MCInstrDesc &UseDesc = UseMI->getDesc();
494 
495  // Don't fold into target independent nodes. Target independent opcodes
496  // don't have defined register classes.
497  if (UseDesc.isVariadic() ||
498  UseOp.isImplicit() ||
499  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
500  return;
501  }
502 
503  if (!FoldingImm) {
504  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
505 
506  // FIXME: We could try to change the instruction from 64-bit to 32-bit
507  // to enable more folding opportunites. The shrink operands pass
508  // already does this.
509  return;
510  }
511 
512 
513  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
514  const TargetRegisterClass *FoldRC =
515  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
516 
517 
518  // Split 64-bit constants into 32-bits for folding.
519  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
520  unsigned UseReg = UseOp.getReg();
521  const TargetRegisterClass *UseRC
523  MRI->getRegClass(UseReg) :
524  TRI->getPhysRegClass(UseReg);
525 
526  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
527  return;
528 
529  APInt Imm(64, OpToFold.getImm());
530  if (UseOp.getSubReg() == AMDGPU::sub0) {
531  Imm = Imm.getLoBits(32);
532  } else {
533  assert(UseOp.getSubReg() == AMDGPU::sub1);
534  Imm = Imm.getHiBits(32);
535  }
536 
537  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
538  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
539  return;
540  }
541 
542 
543 
544  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
545 }
546 
547 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
548  uint32_t LHS, uint32_t RHS) {
549  switch (Opcode) {
550  case AMDGPU::V_AND_B32_e64:
551  case AMDGPU::V_AND_B32_e32:
552  case AMDGPU::S_AND_B32:
553  Result = LHS & RHS;
554  return true;
555  case AMDGPU::V_OR_B32_e64:
556  case AMDGPU::V_OR_B32_e32:
557  case AMDGPU::S_OR_B32:
558  Result = LHS | RHS;
559  return true;
560  case AMDGPU::V_XOR_B32_e64:
561  case AMDGPU::V_XOR_B32_e32:
562  case AMDGPU::S_XOR_B32:
563  Result = LHS ^ RHS;
564  return true;
565  case AMDGPU::V_LSHL_B32_e64:
566  case AMDGPU::V_LSHL_B32_e32:
567  case AMDGPU::S_LSHL_B32:
568  // The instruction ignores the high bits for out of bounds shifts.
569  Result = LHS << (RHS & 31);
570  return true;
571  case AMDGPU::V_LSHLREV_B32_e64:
572  case AMDGPU::V_LSHLREV_B32_e32:
573  Result = RHS << (LHS & 31);
574  return true;
575  case AMDGPU::V_LSHR_B32_e64:
576  case AMDGPU::V_LSHR_B32_e32:
577  case AMDGPU::S_LSHR_B32:
578  Result = LHS >> (RHS & 31);
579  return true;
580  case AMDGPU::V_LSHRREV_B32_e64:
581  case AMDGPU::V_LSHRREV_B32_e32:
582  Result = RHS >> (LHS & 31);
583  return true;
584  case AMDGPU::V_ASHR_I32_e64:
585  case AMDGPU::V_ASHR_I32_e32:
586  case AMDGPU::S_ASHR_I32:
587  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
588  return true;
589  case AMDGPU::V_ASHRREV_I32_e64:
590  case AMDGPU::V_ASHRREV_I32_e32:
591  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
592  return true;
593  default:
594  return false;
595  }
596 }
597 
598 static unsigned getMovOpc(bool IsScalar) {
599  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
600 }
601 
602 /// Remove any leftover implicit operands from mutating the instruction. e.g.
603 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
604 /// anymore.
606  const MCInstrDesc &Desc = MI.getDesc();
607  unsigned NumOps = Desc.getNumOperands() +
608  Desc.getNumImplicitUses() +
609  Desc.getNumImplicitDefs();
610 
611  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
612  MI.RemoveOperand(I);
613 }
614 
615 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
616  MI.setDesc(NewDesc);
618 }
619 
621  MachineOperand &Op) {
622  if (Op.isReg()) {
623  // If this has a subregister, it obviously is a register source.
624  if (Op.getSubReg() != AMDGPU::NoSubRegister ||
626  return &Op;
627 
628  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
629  if (Def && Def->isMoveImmediate()) {
630  MachineOperand &ImmSrc = Def->getOperand(1);
631  if (ImmSrc.isImm())
632  return &ImmSrc;
633  }
634  }
635 
636  return &Op;
637 }
638 
639 // Try to simplify operations with a constant that may appear after instruction
640 // selection.
641 // TODO: See if a frame index with a fixed offset can fold.
643  const SIInstrInfo *TII,
644  MachineInstr *MI,
645  MachineOperand *ImmOp) {
646  unsigned Opc = MI->getOpcode();
647  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
648  Opc == AMDGPU::S_NOT_B32) {
649  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
650  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
651  return true;
652  }
653 
654  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
655  if (Src1Idx == -1)
656  return false;
657 
658  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
659  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
660  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
661 
662  if (!Src0->isImm() && !Src1->isImm())
663  return false;
664 
665  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
666  if (Src0->isImm() && Src0->getImm() == 0) {
667  // v_lshl_or_b32 0, X, Y -> copy Y
668  // v_lshl_or_b32 0, X, K -> v_mov_b32 K
669  bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
670  MI->RemoveOperand(Src1Idx);
671  MI->RemoveOperand(Src0Idx);
672 
673  MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
674  return true;
675  }
676  }
677 
678  // and k0, k1 -> v_mov_b32 (k0 & k1)
679  // or k0, k1 -> v_mov_b32 (k0 | k1)
680  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
681  if (Src0->isImm() && Src1->isImm()) {
682  int32_t NewImm;
683  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
684  return false;
685 
686  const SIRegisterInfo &TRI = TII->getRegisterInfo();
687  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
688 
689  // Be careful to change the right operand, src0 may belong to a different
690  // instruction.
691  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
692  MI->RemoveOperand(Src1Idx);
693  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
694  return true;
695  }
696 
697  if (!MI->isCommutable())
698  return false;
699 
700  if (Src0->isImm() && !Src1->isImm()) {
701  std::swap(Src0, Src1);
702  std::swap(Src0Idx, Src1Idx);
703  }
704 
705  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
706  if (Opc == AMDGPU::V_OR_B32_e64 ||
707  Opc == AMDGPU::V_OR_B32_e32 ||
708  Opc == AMDGPU::S_OR_B32) {
709  if (Src1Val == 0) {
710  // y = or x, 0 => y = copy x
711  MI->RemoveOperand(Src1Idx);
712  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
713  } else if (Src1Val == -1) {
714  // y = or x, -1 => y = v_mov_b32 -1
715  MI->RemoveOperand(Src1Idx);
716  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
717  } else
718  return false;
719 
720  return true;
721  }
722 
723  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
724  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
725  MI->getOpcode() == AMDGPU::S_AND_B32) {
726  if (Src1Val == 0) {
727  // y = and x, 0 => y = v_mov_b32 0
728  MI->RemoveOperand(Src0Idx);
729  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
730  } else if (Src1Val == -1) {
731  // y = and x, -1 => y = copy x
732  MI->RemoveOperand(Src1Idx);
733  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
735  } else
736  return false;
737 
738  return true;
739  }
740 
741  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
742  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
743  MI->getOpcode() == AMDGPU::S_XOR_B32) {
744  if (Src1Val == 0) {
745  // y = xor x, 0 => y = copy x
746  MI->RemoveOperand(Src1Idx);
747  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
748  return true;
749  }
750  }
751 
752  return false;
753 }
754 
755 // Try to fold an instruction into a simpler one
756 static bool tryFoldInst(const SIInstrInfo *TII,
757  MachineInstr *MI) {
758  unsigned Opc = MI->getOpcode();
759 
760  if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
761  Opc == AMDGPU::V_CNDMASK_B32_e64 ||
762  Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
763  const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
764  const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
765  if (Src1->isIdenticalTo(*Src0)) {
766  LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
767  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
768  if (Src2Idx != -1)
769  MI->RemoveOperand(Src2Idx);
770  MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
771  mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
772  : getMovOpc(false)));
773  LLVM_DEBUG(dbgs() << *MI << '\n');
774  return true;
775  }
776  }
777 
778  return false;
779 }
780 
781 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
782  MachineOperand &OpToFold) const {
783  // We need mutate the operands of new mov instructions to add implicit
784  // uses of EXEC, but adding them invalidates the use_iterator, so defer
785  // this.
786  SmallVector<MachineInstr *, 4> CopiesToReplace;
788  MachineOperand &Dst = MI.getOperand(0);
789 
790  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
791  if (FoldingImm) {
792  unsigned NumLiteralUses = 0;
793  MachineOperand *NonInlineUse = nullptr;
794  int NonInlineUseOpNo = -1;
795 
798  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
799  Use != E; Use = NextUse) {
800  NextUse = std::next(Use);
801  MachineInstr *UseMI = Use->getParent();
802  unsigned OpNo = Use.getOperandNo();
803 
804  // Folding the immediate may reveal operations that can be constant
805  // folded or replaced with a copy. This can happen for example after
806  // frame indices are lowered to constants or from splitting 64-bit
807  // constants.
808  //
809  // We may also encounter cases where one or both operands are
810  // immediates materialized into a register, which would ordinarily not
811  // be folded due to multiple uses or operand constraints.
812 
813  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
814  LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n');
815 
816  // Some constant folding cases change the same immediate's use to a new
817  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
818  // again. The same constant folded instruction could also have a second
819  // use operand.
820  NextUse = MRI->use_begin(Dst.getReg());
821  FoldList.clear();
822  continue;
823  }
824 
825  // Try to fold any inline immediate uses, and then only fold other
826  // constants if they have one use.
827  //
828  // The legality of the inline immediate must be checked based on the use
829  // operand, not the defining instruction, because 32-bit instructions
830  // with 32-bit inline immediate sources may be used to materialize
831  // constants used in 16-bit operands.
832  //
833  // e.g. it is unsafe to fold:
834  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
835  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
836 
837  // Folding immediates with more than one use will increase program size.
838  // FIXME: This will also reduce register usage, which may be better
839  // in some cases. A better heuristic is needed.
840  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
841  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
842  } else {
843  if (++NumLiteralUses == 1) {
844  NonInlineUse = &*Use;
845  NonInlineUseOpNo = OpNo;
846  }
847  }
848  }
849 
850  if (NumLiteralUses == 1) {
851  MachineInstr *UseMI = NonInlineUse->getParent();
852  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
853  }
854  } else {
855  // Folding register.
858  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
859  Use != E; ++Use) {
860  UsesToProcess.push_back(Use);
861  }
862  for (auto U : UsesToProcess) {
863  MachineInstr *UseMI = U->getParent();
864 
865  foldOperand(OpToFold, UseMI, U.getOperandNo(),
866  FoldList, CopiesToReplace);
867  }
868  }
869 
870  MachineFunction *MF = MI.getParent()->getParent();
871  // Make sure we add EXEC uses to any new v_mov instructions created.
872  for (MachineInstr *Copy : CopiesToReplace)
873  Copy->addImplicitDefUseOperands(*MF);
874 
875  for (FoldCandidate &Fold : FoldList) {
876  if (updateOperand(Fold, *TII, *TRI)) {
877  // Clear kill flags.
878  if (Fold.isReg()) {
879  assert(Fold.OpToFold && Fold.OpToFold->isReg());
880  // FIXME: Probably shouldn't bother trying to fold if not an
881  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
882  // copies.
883  MRI->clearKillFlags(Fold.OpToFold->getReg());
884  }
885  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
886  << static_cast<int>(Fold.UseOpNo) << " of "
887  << *Fold.UseMI << '\n');
888  tryFoldInst(TII, Fold.UseMI);
889  } else if (Fold.isCommuted()) {
890  // Restoring instruction's original operand order if fold has failed.
891  TII->commuteInstruction(*Fold.UseMI, false);
892  }
893  }
894 }
895 
896 // Clamp patterns are canonically selected to v_max_* instructions, so only
897 // handle them.
898 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
899  unsigned Op = MI.getOpcode();
900  switch (Op) {
901  case AMDGPU::V_MAX_F32_e64:
902  case AMDGPU::V_MAX_F16_e64:
903  case AMDGPU::V_MAX_F64:
904  case AMDGPU::V_PK_MAX_F16: {
905  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
906  return nullptr;
907 
908  // Make sure sources are identical.
909  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
910  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
911  if (!Src0->isReg() || !Src1->isReg() ||
912  Src0->getReg() != Src1->getReg() ||
913  Src0->getSubReg() != Src1->getSubReg() ||
914  Src0->getSubReg() != AMDGPU::NoSubRegister)
915  return nullptr;
916 
917  // Can't fold up if we have modifiers.
918  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
919  return nullptr;
920 
921  unsigned Src0Mods
922  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
923  unsigned Src1Mods
924  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
925 
926  // Having a 0 op_sel_hi would require swizzling the output in the source
927  // instruction, which we can't do.
928  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0;
929  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
930  return nullptr;
931  return Src0;
932  }
933  default:
934  return nullptr;
935  }
936 }
937 
938 // We obviously have multiple uses in a clamp since the register is used twice
939 // in the same instruction.
940 static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
941  int Count = 0;
942  for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
943  I != E; ++I) {
944  if (++Count > 1)
945  return false;
946  }
947 
948  return true;
949 }
950 
951 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
952 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
953  const MachineOperand *ClampSrc = isClamp(MI);
954  if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
955  return false;
956 
957  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
958 
959  // The type of clamp must be compatible.
960  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
961  return false;
962 
963  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
964  if (!DefClamp)
965  return false;
966 
967  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def
968  << '\n');
969 
970  // Clamp is applied after omod, so it is OK if omod is set.
971  DefClamp->setImm(1);
972  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
973  MI.eraseFromParent();
974  return true;
975 }
976 
977 static int getOModValue(unsigned Opc, int64_t Val) {
978  switch (Opc) {
979  case AMDGPU::V_MUL_F32_e64: {
980  switch (static_cast<uint32_t>(Val)) {
981  case 0x3f000000: // 0.5
982  return SIOutMods::DIV2;
983  case 0x40000000: // 2.0
984  return SIOutMods::MUL2;
985  case 0x40800000: // 4.0
986  return SIOutMods::MUL4;
987  default:
988  return SIOutMods::NONE;
989  }
990  }
991  case AMDGPU::V_MUL_F16_e64: {
992  switch (static_cast<uint16_t>(Val)) {
993  case 0x3800: // 0.5
994  return SIOutMods::DIV2;
995  case 0x4000: // 2.0
996  return SIOutMods::MUL2;
997  case 0x4400: // 4.0
998  return SIOutMods::MUL4;
999  default:
1000  return SIOutMods::NONE;
1001  }
1002  }
1003  default:
1004  llvm_unreachable("invalid mul opcode");
1005  }
1006 }
1007 
1008 // FIXME: Does this really not support denormals with f16?
1009 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1010 // handled, so will anything other than that break?
1011 std::pair<const MachineOperand *, int>
1012 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1013  unsigned Op = MI.getOpcode();
1014  switch (Op) {
1015  case AMDGPU::V_MUL_F32_e64:
1016  case AMDGPU::V_MUL_F16_e64: {
1017  // If output denormals are enabled, omod is ignored.
1018  if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
1019  (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
1020  return std::make_pair(nullptr, SIOutMods::NONE);
1021 
1022  const MachineOperand *RegOp = nullptr;
1023  const MachineOperand *ImmOp = nullptr;
1024  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1025  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1026  if (Src0->isImm()) {
1027  ImmOp = Src0;
1028  RegOp = Src1;
1029  } else if (Src1->isImm()) {
1030  ImmOp = Src1;
1031  RegOp = Src0;
1032  } else
1033  return std::make_pair(nullptr, SIOutMods::NONE);
1034 
1035  int OMod = getOModValue(Op, ImmOp->getImm());
1036  if (OMod == SIOutMods::NONE ||
1037  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1038  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1039  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1040  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1041  return std::make_pair(nullptr, SIOutMods::NONE);
1042 
1043  return std::make_pair(RegOp, OMod);
1044  }
1045  case AMDGPU::V_ADD_F32_e64:
1046  case AMDGPU::V_ADD_F16_e64: {
1047  // If output denormals are enabled, omod is ignored.
1048  if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
1049  (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
1050  return std::make_pair(nullptr, SIOutMods::NONE);
1051 
1052  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1053  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1054  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1055 
1056  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1057  Src0->getSubReg() == Src1->getSubReg() &&
1058  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1059  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1060  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1061  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1062  return std::make_pair(Src0, SIOutMods::MUL2);
1063 
1064  return std::make_pair(nullptr, SIOutMods::NONE);
1065  }
1066  default:
1067  return std::make_pair(nullptr, SIOutMods::NONE);
1068  }
1069 }
1070 
1071 // FIXME: Does this need to check IEEE bit on function?
1072 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1073  const MachineOperand *RegOp;
1074  int OMod;
1075  std::tie(RegOp, OMod) = isOMod(MI);
1076  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1077  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1078  !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
1079  return false;
1080 
1081  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1082  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1083  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1084  return false;
1085 
1086  // Clamp is applied after omod. If the source already has clamp set, don't
1087  // fold it.
1088  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1089  return false;
1090 
1091  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
1092 
1093  DefOMod->setImm(OMod);
1094  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1095  MI.eraseFromParent();
1096  return true;
1097 }
1098 
1099 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1100  if (skipFunction(MF.getFunction()))
1101  return false;
1102 
1103  MRI = &MF.getRegInfo();
1104  ST = &MF.getSubtarget<GCNSubtarget>();
1105  TII = ST->getInstrInfo();
1106  TRI = &TII->getRegisterInfo();
1107 
1109 
1110  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1111  // correctly handle signed zeros.
1112  //
1113  bool IsIEEEMode = ST->enableIEEEBit(MF);
1114  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1115 
1116  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1118  for (I = MBB->begin(); I != MBB->end(); I = Next) {
1119  Next = std::next(I);
1120  MachineInstr &MI = *I;
1121 
1122  tryFoldInst(TII, &MI);
1123 
1124  if (!TII->isFoldableCopy(MI)) {
1125  // TODO: Omod might be OK if there is NSZ only on the source
1126  // instruction, and not the omod multiply.
1127  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1128  !tryFoldOMod(MI))
1129  tryFoldClamp(MI);
1130  continue;
1131  }
1132 
1133  MachineOperand &OpToFold = MI.getOperand(1);
1134  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
1135 
1136  // FIXME: We could also be folding things like TargetIndexes.
1137  if (!FoldingImm && !OpToFold.isReg())
1138  continue;
1139 
1140  if (OpToFold.isReg() &&
1142  continue;
1143 
1144  // Prevent folding operands backwards in the function. For example,
1145  // the COPY opcode must not be replaced by 1 in this example:
1146  //
1147  // %3 = COPY %vgpr0; VGPR_32:%3
1148  // ...
1149  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1150  MachineOperand &Dst = MI.getOperand(0);
1151  if (Dst.isReg() &&
1153  continue;
1154 
1155  foldInstOperand(MI, OpToFold);
1156  }
1157  }
1158  return false;
1159 }
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:526
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:548
This class represents lattice values for constants.
Definition: AllocatorList.h:23
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:508
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool isRegSequence() const
unsigned const TargetRegisterInfo * TRI
F(f)
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:164
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:699
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:515
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:78
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:47
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:234
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Register is known to be fully dead.
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define DEBUG_TYPE
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
int64_t getImm() const
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Class for arbitrary precision integers.
Definition: APInt.h:69
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:72
FunctionPass * createSIFoldOperandsPass()
static unsigned UseReg(const MachineOperand &MO)
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
const unsigned Kind
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:402
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:294
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:847
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool isImplicit() const
const SIRegisterInfo * getRegisterInfo() const override