LLVM  9.0.0svn
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIInstrInfo.h"
14 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Support/Debug.h"
24 
25 #define DEBUG_TYPE "si-fold-operands"
26 using namespace llvm;
27 
28 namespace {
29 
30 struct FoldCandidate {
32  union {
33  MachineOperand *OpToFold;
34  uint64_t ImmToFold;
35  int FrameIndexToFold;
36  };
37  int ShrinkOpcode;
38  unsigned char UseOpNo;
40  bool Commuted;
41 
42  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
43  bool Commuted_ = false,
44  int ShrinkOp = -1) :
45  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
46  Kind(FoldOp->getType()),
47  Commuted(Commuted_) {
48  if (FoldOp->isImm()) {
49  ImmToFold = FoldOp->getImm();
50  } else if (FoldOp->isFI()) {
51  FrameIndexToFold = FoldOp->getIndex();
52  } else {
53  assert(FoldOp->isReg());
54  OpToFold = FoldOp;
55  }
56  }
57 
58  bool isFI() const {
59  return Kind == MachineOperand::MO_FrameIndex;
60  }
61 
62  bool isImm() const {
63  return Kind == MachineOperand::MO_Immediate;
64  }
65 
66  bool isReg() const {
67  return Kind == MachineOperand::MO_Register;
68  }
69 
70  bool isCommuted() const {
71  return Commuted;
72  }
73 
74  bool needsShrink() const {
75  return ShrinkOpcode != -1;
76  }
77 
78  int getShrinkOpcode() const {
79  return ShrinkOpcode;
80  }
81 };
82 
83 class SIFoldOperands : public MachineFunctionPass {
84 public:
85  static char ID;
87  const SIInstrInfo *TII;
88  const SIRegisterInfo *TRI;
89  const GCNSubtarget *ST;
90 
91  void foldOperand(MachineOperand &OpToFold,
93  unsigned UseOpIdx,
95  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
96 
97  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
98 
99  const MachineOperand *isClamp(const MachineInstr &MI) const;
100  bool tryFoldClamp(MachineInstr &MI);
101 
102  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
103  bool tryFoldOMod(MachineInstr &MI);
104 
105 public:
106  SIFoldOperands() : MachineFunctionPass(ID) {
108  }
109 
110  bool runOnMachineFunction(MachineFunction &MF) override;
111 
112  StringRef getPassName() const override { return "SI Fold Operands"; }
113 
114  void getAnalysisUsage(AnalysisUsage &AU) const override {
115  AU.setPreservesCFG();
117  }
118 };
119 
120 } // End anonymous namespace.
121 
122 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
123  "SI Fold Operands", false, false)
124 
125 char SIFoldOperands::ID = 0;
126 
128 
129 // Wrapper around isInlineConstant that understands special cases when
130 // instruction types are replaced during operand folding.
132  const MachineInstr &UseMI,
133  unsigned OpNo,
134  const MachineOperand &OpToFold) {
135  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
136  return true;
137 
138  unsigned Opc = UseMI.getOpcode();
139  switch (Opc) {
140  case AMDGPU::V_MAC_F32_e64:
141  case AMDGPU::V_MAC_F16_e64:
142  case AMDGPU::V_FMAC_F32_e64: {
143  // Special case for mac. Since this is replaced with mad when folded into
144  // src2, we need to check the legality for the final instruction.
145  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
146  if (static_cast<int>(OpNo) == Src2Idx) {
147  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
148  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
149 
150  unsigned Opc = IsFMA ?
151  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
152  const MCInstrDesc &MadDesc = TII->get(Opc);
153  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
154  }
155  return false;
156  }
157  default:
158  return false;
159  }
160 }
161 
163  return new SIFoldOperands();
164 }
165 
166 static bool updateOperand(FoldCandidate &Fold,
167  const SIInstrInfo &TII,
168  const TargetRegisterInfo &TRI,
169  const GCNSubtarget &ST) {
170  MachineInstr *MI = Fold.UseMI;
171  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
172  assert(Old.isReg());
173 
174  if (Fold.isImm()) {
175  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
176  AMDGPU::isInlinableLiteralV216(static_cast<uint16_t>(Fold.ImmToFold),
177  ST.hasInv2PiInlineImm())) {
178  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
179  // already set.
180  unsigned Opcode = MI->getOpcode();
181  int OpNo = MI->getOperandNo(&Old);
182  int ModIdx = -1;
183  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
184  ModIdx = AMDGPU::OpName::src0_modifiers;
185  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
186  ModIdx = AMDGPU::OpName::src1_modifiers;
187  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
188  ModIdx = AMDGPU::OpName::src2_modifiers;
189  assert(ModIdx != -1);
190  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
191  MachineOperand &Mod = MI->getOperand(ModIdx);
192  unsigned Val = Mod.getImm();
193  if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
194  return false;
195  // Only apply the following transformation if that operand requries
196  // a packed immediate.
197  switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
202  // If upper part is all zero we do not need op_sel_hi.
203  if (!isUInt<16>(Fold.ImmToFold)) {
204  if (!(Fold.ImmToFold & 0xffff)) {
205  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
206  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
207  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
208  return true;
209  }
210  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
211  Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
212  return true;
213  }
214  break;
215  default:
216  break;
217  }
218  }
219  }
220 
221  if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
222  MachineBasicBlock *MBB = MI->getParent();
223  auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
224  if (Liveness != MachineBasicBlock::LQR_Dead)
225  return false;
226 
228  int Op32 = Fold.getShrinkOpcode();
229  MachineOperand &Dst0 = MI->getOperand(0);
230  MachineOperand &Dst1 = MI->getOperand(1);
231  assert(Dst0.isDef() && Dst1.isDef());
232 
233  bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
234 
235  const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
236  unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
237 
238  MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
239 
240  if (HaveNonDbgCarryUse) {
241  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
242  .addReg(AMDGPU::VCC, RegState::Kill);
243  }
244 
245  // Keep the old instruction around to avoid breaking iterators, but
246  // replace it with a dummy instruction to remove uses.
247  //
248  // FIXME: We should not invert how this pass looks at operands to avoid
249  // this. Should track set of foldable movs instead of looking for uses
250  // when looking at a use.
251  Dst0.setReg(NewReg0);
252  for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
253  MI->RemoveOperand(I);
254  MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
255 
256  if (Fold.isCommuted())
257  TII.commuteInstruction(*Inst32, false);
258  return true;
259  }
260 
261  assert(!Fold.needsShrink() && "not handled");
262 
263  if (Fold.isImm()) {
264  Old.ChangeToImmediate(Fold.ImmToFold);
265  return true;
266  }
267 
268  if (Fold.isFI()) {
269  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
270  return true;
271  }
272 
273  MachineOperand *New = Fold.OpToFold;
276  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
277 
278  Old.setIsUndef(New->isUndef());
279  return true;
280  }
281 
282  // FIXME: Handle physical registers.
283 
284  return false;
285 }
286 
288  const MachineInstr *MI) {
289  for (auto Candidate : FoldList) {
290  if (Candidate.UseMI == MI)
291  return true;
292  }
293  return false;
294 }
295 
297  MachineInstr *MI, unsigned OpNo,
298  MachineOperand *OpToFold,
299  const SIInstrInfo *TII) {
300  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
301 
302  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
303  unsigned Opc = MI->getOpcode();
304  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
305  Opc == AMDGPU::V_FMAC_F32_e64) &&
306  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
307  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
308  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
309  unsigned NewOpc = IsFMA ?
310  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
311 
312  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
313  // to fold the operand.
314  MI->setDesc(TII->get(NewOpc));
315  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
316  if (FoldAsMAD) {
317  MI->untieRegOperand(OpNo);
318  return true;
319  }
320  MI->setDesc(TII->get(Opc));
321  }
322 
323  // Special case for s_setreg_b32
324  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
325  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
326  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
327  return true;
328  }
329 
330  // If we are already folding into another operand of MI, then
331  // we can't commute the instruction, otherwise we risk making the
332  // other fold illegal.
333  if (isUseMIInFoldList(FoldList, MI))
334  return false;
335 
336  unsigned CommuteOpNo = OpNo;
337 
338  // Operand is not legal, so try to commute the instruction to
339  // see if this makes it possible to fold.
340  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
341  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
342  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
343 
344  if (CanCommute) {
345  if (CommuteIdx0 == OpNo)
346  CommuteOpNo = CommuteIdx1;
347  else if (CommuteIdx1 == OpNo)
348  CommuteOpNo = CommuteIdx0;
349  }
350 
351 
352  // One of operands might be an Imm operand, and OpNo may refer to it after
353  // the call of commuteInstruction() below. Such situations are avoided
354  // here explicitly as OpNo must be a register operand to be a candidate
355  // for memory folding.
356  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
357  !MI->getOperand(CommuteIdx1).isReg()))
358  return false;
359 
360  if (!CanCommute ||
361  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
362  return false;
363 
364  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
365  if ((Opc == AMDGPU::V_ADD_I32_e64 ||
366  Opc == AMDGPU::V_SUB_I32_e64 ||
367  Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
368  (OpToFold->isImm() || OpToFold->isFI())) {
370 
371  // Verify the other operand is a VGPR, otherwise we would violate the
372  // constant bus restriction.
373  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
374  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
375  if (!OtherOp.isReg() ||
376  !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
377  return false;
378 
379  assert(MI->getOperand(1).isDef());
380 
381  // Make sure to get the 32-bit version of the commuted opcode.
382  unsigned MaybeCommutedOpc = MI->getOpcode();
383  int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
384 
385  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
386  Op32));
387  return true;
388  }
389 
390  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
391  return false;
392  }
393 
394  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
395  return true;
396  }
397 
398  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
399  return true;
400 }
401 
402 // If the use operand doesn't care about the value, this may be an operand only
403 // used for register indexing, in which case it is unsafe to fold.
404 static bool isUseSafeToFold(const SIInstrInfo *TII,
405  const MachineInstr &MI,
406  const MachineOperand &UseMO) {
407  return !UseMO.isUndef() && !TII->isSDWA(MI);
408  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
409 }
410 
411 void SIFoldOperands::foldOperand(
412  MachineOperand &OpToFold,
414  unsigned UseOpIdx,
416  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
417  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
418 
419  if (!isUseSafeToFold(TII, *UseMI, UseOp))
420  return;
421 
422  // FIXME: Fold operands with subregs.
423  if (UseOp.isReg() && OpToFold.isReg()) {
424  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
425  return;
426 
427  // Don't fold subregister extracts into tied operands, only if it is a full
428  // copy since a subregister use tied to a full register def doesn't really
429  // make sense. e.g. don't fold:
430  //
431  // %1 = COPY %0:sub1
432  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
433  //
434  // into
435  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
436  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
437  return;
438  }
439 
440  // Special case for REG_SEQUENCE: We can't fold literals into
441  // REG_SEQUENCE instructions, so we have to fold them into the
442  // uses of REG_SEQUENCE.
443  if (UseMI->isRegSequence()) {
444  unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
445  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
446 
448  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
449  RSUse != RSE; ++RSUse) {
450 
451  MachineInstr *RSUseMI = RSUse->getParent();
452  if (RSUse->getSubReg() != RegSeqDstSubReg)
453  continue;
454 
455  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
456  CopiesToReplace);
457  }
458 
459  return;
460  }
461 
462 
463  bool FoldingImm = OpToFold.isImm();
464 
465  if (FoldingImm && UseMI->isCopy()) {
466  unsigned DestReg = UseMI->getOperand(0).getReg();
467  const TargetRegisterClass *DestRC
469  MRI->getRegClass(DestReg) :
470  TRI->getPhysRegClass(DestReg);
471 
472  unsigned SrcReg = UseMI->getOperand(1).getReg();
475  const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
476  if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
480  Use = MRI->use_begin(DestReg), E = MRI->use_end();
481  Use != E; Use = NextUse) {
482  NextUse = std::next(Use);
483  FoldCandidate FC = FoldCandidate(Use->getParent(),
484  Use.getOperandNo(), &UseMI->getOperand(1));
485  CopyUses.push_back(FC);
486  }
487  for (auto & F : CopyUses) {
488  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
489  FoldList, CopiesToReplace);
490  }
491  }
492  }
493 
494  // In order to fold immediates into copies, we need to change the
495  // copy to a MOV.
496 
497  unsigned MovOp = TII->getMovOpcode(DestRC);
498  if (MovOp == AMDGPU::COPY)
499  return;
500 
501  UseMI->setDesc(TII->get(MovOp));
502  CopiesToReplace.push_back(UseMI);
503  } else {
504  if (UseMI->isCopy() && OpToFold.isReg() &&
507  TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
508  TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
509  !UseMI->getOperand(1).getSubReg()) {
510  UseMI->getOperand(1).setReg(OpToFold.getReg());
511  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
512  UseMI->getOperand(1).setIsKill(false);
513  CopiesToReplace.push_back(UseMI);
514  OpToFold.setIsKill(false);
515  return;
516  }
517 
518  const MCInstrDesc &UseDesc = UseMI->getDesc();
519 
520  // Don't fold into target independent nodes. Target independent opcodes
521  // don't have defined register classes.
522  if (UseDesc.isVariadic() ||
523  UseOp.isImplicit() ||
524  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
525  return;
526  }
527 
528  if (!FoldingImm) {
529  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
530 
531  // FIXME: We could try to change the instruction from 64-bit to 32-bit
532  // to enable more folding opportunites. The shrink operands pass
533  // already does this.
534  return;
535  }
536 
537 
538  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
539  const TargetRegisterClass *FoldRC =
540  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
541 
542 
543  // Split 64-bit constants into 32-bits for folding.
544  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
545  unsigned UseReg = UseOp.getReg();
546  const TargetRegisterClass *UseRC
548  MRI->getRegClass(UseReg) :
549  TRI->getPhysRegClass(UseReg);
550 
551  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
552  return;
553 
554  APInt Imm(64, OpToFold.getImm());
555  if (UseOp.getSubReg() == AMDGPU::sub0) {
556  Imm = Imm.getLoBits(32);
557  } else {
558  assert(UseOp.getSubReg() == AMDGPU::sub1);
559  Imm = Imm.getHiBits(32);
560  }
561 
562  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
563  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
564  return;
565  }
566 
567 
568 
569  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
570 }
571 
572 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
573  uint32_t LHS, uint32_t RHS) {
574  switch (Opcode) {
575  case AMDGPU::V_AND_B32_e64:
576  case AMDGPU::V_AND_B32_e32:
577  case AMDGPU::S_AND_B32:
578  Result = LHS & RHS;
579  return true;
580  case AMDGPU::V_OR_B32_e64:
581  case AMDGPU::V_OR_B32_e32:
582  case AMDGPU::S_OR_B32:
583  Result = LHS | RHS;
584  return true;
585  case AMDGPU::V_XOR_B32_e64:
586  case AMDGPU::V_XOR_B32_e32:
587  case AMDGPU::S_XOR_B32:
588  Result = LHS ^ RHS;
589  return true;
590  case AMDGPU::V_LSHL_B32_e64:
591  case AMDGPU::V_LSHL_B32_e32:
592  case AMDGPU::S_LSHL_B32:
593  // The instruction ignores the high bits for out of bounds shifts.
594  Result = LHS << (RHS & 31);
595  return true;
596  case AMDGPU::V_LSHLREV_B32_e64:
597  case AMDGPU::V_LSHLREV_B32_e32:
598  Result = RHS << (LHS & 31);
599  return true;
600  case AMDGPU::V_LSHR_B32_e64:
601  case AMDGPU::V_LSHR_B32_e32:
602  case AMDGPU::S_LSHR_B32:
603  Result = LHS >> (RHS & 31);
604  return true;
605  case AMDGPU::V_LSHRREV_B32_e64:
606  case AMDGPU::V_LSHRREV_B32_e32:
607  Result = RHS >> (LHS & 31);
608  return true;
609  case AMDGPU::V_ASHR_I32_e64:
610  case AMDGPU::V_ASHR_I32_e32:
611  case AMDGPU::S_ASHR_I32:
612  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
613  return true;
614  case AMDGPU::V_ASHRREV_I32_e64:
615  case AMDGPU::V_ASHRREV_I32_e32:
616  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
617  return true;
618  default:
619  return false;
620  }
621 }
622 
623 static unsigned getMovOpc(bool IsScalar) {
624  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
625 }
626 
627 /// Remove any leftover implicit operands from mutating the instruction. e.g.
628 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
629 /// anymore.
631  const MCInstrDesc &Desc = MI.getDesc();
632  unsigned NumOps = Desc.getNumOperands() +
633  Desc.getNumImplicitUses() +
634  Desc.getNumImplicitDefs();
635 
636  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
637  MI.RemoveOperand(I);
638 }
639 
640 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
641  MI.setDesc(NewDesc);
643 }
644 
646  MachineOperand &Op) {
647  if (Op.isReg()) {
648  // If this has a subregister, it obviously is a register source.
649  if (Op.getSubReg() != AMDGPU::NoSubRegister ||
651  return &Op;
652 
653  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
654  if (Def && Def->isMoveImmediate()) {
655  MachineOperand &ImmSrc = Def->getOperand(1);
656  if (ImmSrc.isImm())
657  return &ImmSrc;
658  }
659  }
660 
661  return &Op;
662 }
663 
664 // Try to simplify operations with a constant that may appear after instruction
665 // selection.
666 // TODO: See if a frame index with a fixed offset can fold.
668  const SIInstrInfo *TII,
669  MachineInstr *MI,
670  MachineOperand *ImmOp) {
671  unsigned Opc = MI->getOpcode();
672  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
673  Opc == AMDGPU::S_NOT_B32) {
674  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
675  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
676  return true;
677  }
678 
679  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
680  if (Src1Idx == -1)
681  return false;
682 
683  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
684  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
685  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
686 
687  if (!Src0->isImm() && !Src1->isImm())
688  return false;
689 
690  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
691  if (Src0->isImm() && Src0->getImm() == 0) {
692  // v_lshl_or_b32 0, X, Y -> copy Y
693  // v_lshl_or_b32 0, X, K -> v_mov_b32 K
694  bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
695  MI->RemoveOperand(Src1Idx);
696  MI->RemoveOperand(Src0Idx);
697 
698  MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
699  return true;
700  }
701  }
702 
703  // and k0, k1 -> v_mov_b32 (k0 & k1)
704  // or k0, k1 -> v_mov_b32 (k0 | k1)
705  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
706  if (Src0->isImm() && Src1->isImm()) {
707  int32_t NewImm;
708  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
709  return false;
710 
711  const SIRegisterInfo &TRI = TII->getRegisterInfo();
712  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
713 
714  // Be careful to change the right operand, src0 may belong to a different
715  // instruction.
716  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
717  MI->RemoveOperand(Src1Idx);
718  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
719  return true;
720  }
721 
722  if (!MI->isCommutable())
723  return false;
724 
725  if (Src0->isImm() && !Src1->isImm()) {
726  std::swap(Src0, Src1);
727  std::swap(Src0Idx, Src1Idx);
728  }
729 
730  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
731  if (Opc == AMDGPU::V_OR_B32_e64 ||
732  Opc == AMDGPU::V_OR_B32_e32 ||
733  Opc == AMDGPU::S_OR_B32) {
734  if (Src1Val == 0) {
735  // y = or x, 0 => y = copy x
736  MI->RemoveOperand(Src1Idx);
737  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
738  } else if (Src1Val == -1) {
739  // y = or x, -1 => y = v_mov_b32 -1
740  MI->RemoveOperand(Src1Idx);
741  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
742  } else
743  return false;
744 
745  return true;
746  }
747 
748  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
749  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
750  MI->getOpcode() == AMDGPU::S_AND_B32) {
751  if (Src1Val == 0) {
752  // y = and x, 0 => y = v_mov_b32 0
753  MI->RemoveOperand(Src0Idx);
754  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
755  } else if (Src1Val == -1) {
756  // y = and x, -1 => y = copy x
757  MI->RemoveOperand(Src1Idx);
758  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
760  } else
761  return false;
762 
763  return true;
764  }
765 
766  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
767  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
768  MI->getOpcode() == AMDGPU::S_XOR_B32) {
769  if (Src1Val == 0) {
770  // y = xor x, 0 => y = copy x
771  MI->RemoveOperand(Src1Idx);
772  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
773  return true;
774  }
775  }
776 
777  return false;
778 }
779 
780 // Try to fold an instruction into a simpler one
781 static bool tryFoldInst(const SIInstrInfo *TII,
782  MachineInstr *MI) {
783  unsigned Opc = MI->getOpcode();
784 
785  if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
786  Opc == AMDGPU::V_CNDMASK_B32_e64 ||
787  Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
788  const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
789  const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
790  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
791  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
792  if (Src1->isIdenticalTo(*Src0) &&
793  (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) &&
794  (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) {
795  LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
796  auto &NewDesc =
797  TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
798  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
799  if (Src2Idx != -1)
800  MI->RemoveOperand(Src2Idx);
801  MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
802  if (Src1ModIdx != -1)
803  MI->RemoveOperand(Src1ModIdx);
804  if (Src0ModIdx != -1)
805  MI->RemoveOperand(Src0ModIdx);
806  mutateCopyOp(*MI, NewDesc);
807  LLVM_DEBUG(dbgs() << *MI << '\n');
808  return true;
809  }
810  }
811 
812  return false;
813 }
814 
815 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
816  MachineOperand &OpToFold) const {
817  // We need mutate the operands of new mov instructions to add implicit
818  // uses of EXEC, but adding them invalidates the use_iterator, so defer
819  // this.
820  SmallVector<MachineInstr *, 4> CopiesToReplace;
822  MachineOperand &Dst = MI.getOperand(0);
823 
824  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
825  if (FoldingImm) {
826  unsigned NumLiteralUses = 0;
827  MachineOperand *NonInlineUse = nullptr;
828  int NonInlineUseOpNo = -1;
829 
832  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
833  Use != E; Use = NextUse) {
834  NextUse = std::next(Use);
835  MachineInstr *UseMI = Use->getParent();
836  unsigned OpNo = Use.getOperandNo();
837 
838  // Folding the immediate may reveal operations that can be constant
839  // folded or replaced with a copy. This can happen for example after
840  // frame indices are lowered to constants or from splitting 64-bit
841  // constants.
842  //
843  // We may also encounter cases where one or both operands are
844  // immediates materialized into a register, which would ordinarily not
845  // be folded due to multiple uses or operand constraints.
846 
847  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
848  LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n');
849 
850  // Some constant folding cases change the same immediate's use to a new
851  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
852  // again. The same constant folded instruction could also have a second
853  // use operand.
854  NextUse = MRI->use_begin(Dst.getReg());
855  FoldList.clear();
856  continue;
857  }
858 
859  // Try to fold any inline immediate uses, and then only fold other
860  // constants if they have one use.
861  //
862  // The legality of the inline immediate must be checked based on the use
863  // operand, not the defining instruction, because 32-bit instructions
864  // with 32-bit inline immediate sources may be used to materialize
865  // constants used in 16-bit operands.
866  //
867  // e.g. it is unsafe to fold:
868  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
869  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
870 
871  // Folding immediates with more than one use will increase program size.
872  // FIXME: This will also reduce register usage, which may be better
873  // in some cases. A better heuristic is needed.
874  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
875  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
876  } else {
877  if (++NumLiteralUses == 1) {
878  NonInlineUse = &*Use;
879  NonInlineUseOpNo = OpNo;
880  }
881  }
882  }
883 
884  if (NumLiteralUses == 1) {
885  MachineInstr *UseMI = NonInlineUse->getParent();
886  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
887  }
888  } else {
889  // Folding register.
892  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
893  Use != E; ++Use) {
894  UsesToProcess.push_back(Use);
895  }
896  for (auto U : UsesToProcess) {
897  MachineInstr *UseMI = U->getParent();
898 
899  foldOperand(OpToFold, UseMI, U.getOperandNo(),
900  FoldList, CopiesToReplace);
901  }
902  }
903 
904  MachineFunction *MF = MI.getParent()->getParent();
905  // Make sure we add EXEC uses to any new v_mov instructions created.
906  for (MachineInstr *Copy : CopiesToReplace)
907  Copy->addImplicitDefUseOperands(*MF);
908 
909  for (FoldCandidate &Fold : FoldList) {
910  if (updateOperand(Fold, *TII, *TRI, *ST)) {
911  // Clear kill flags.
912  if (Fold.isReg()) {
913  assert(Fold.OpToFold && Fold.OpToFold->isReg());
914  // FIXME: Probably shouldn't bother trying to fold if not an
915  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
916  // copies.
917  MRI->clearKillFlags(Fold.OpToFold->getReg());
918  }
919  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
920  << static_cast<int>(Fold.UseOpNo) << " of "
921  << *Fold.UseMI << '\n');
922  tryFoldInst(TII, Fold.UseMI);
923  } else if (Fold.isCommuted()) {
924  // Restoring instruction's original operand order if fold has failed.
925  TII->commuteInstruction(*Fold.UseMI, false);
926  }
927  }
928 }
929 
930 // Clamp patterns are canonically selected to v_max_* instructions, so only
931 // handle them.
932 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
933  unsigned Op = MI.getOpcode();
934  switch (Op) {
935  case AMDGPU::V_MAX_F32_e64:
936  case AMDGPU::V_MAX_F16_e64:
937  case AMDGPU::V_MAX_F64:
938  case AMDGPU::V_PK_MAX_F16: {
939  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
940  return nullptr;
941 
942  // Make sure sources are identical.
943  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
944  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
945  if (!Src0->isReg() || !Src1->isReg() ||
946  Src0->getReg() != Src1->getReg() ||
947  Src0->getSubReg() != Src1->getSubReg() ||
948  Src0->getSubReg() != AMDGPU::NoSubRegister)
949  return nullptr;
950 
951  // Can't fold up if we have modifiers.
952  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
953  return nullptr;
954 
955  unsigned Src0Mods
956  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
957  unsigned Src1Mods
958  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
959 
960  // Having a 0 op_sel_hi would require swizzling the output in the source
961  // instruction, which we can't do.
962  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
963  : 0u;
964  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
965  return nullptr;
966  return Src0;
967  }
968  default:
969  return nullptr;
970  }
971 }
972 
973 // We obviously have multiple uses in a clamp since the register is used twice
974 // in the same instruction.
975 static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
976  int Count = 0;
977  for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
978  I != E; ++I) {
979  if (++Count > 1)
980  return false;
981  }
982 
983  return true;
984 }
985 
986 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
987 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
988  const MachineOperand *ClampSrc = isClamp(MI);
989  if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
990  return false;
991 
992  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
993 
994  // The type of clamp must be compatible.
995  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
996  return false;
997 
998  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
999  if (!DefClamp)
1000  return false;
1001 
1002  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def
1003  << '\n');
1004 
1005  // Clamp is applied after omod, so it is OK if omod is set.
1006  DefClamp->setImm(1);
1007  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1008  MI.eraseFromParent();
1009  return true;
1010 }
1011 
1012 static int getOModValue(unsigned Opc, int64_t Val) {
1013  switch (Opc) {
1014  case AMDGPU::V_MUL_F32_e64: {
1015  switch (static_cast<uint32_t>(Val)) {
1016  case 0x3f000000: // 0.5
1017  return SIOutMods::DIV2;
1018  case 0x40000000: // 2.0
1019  return SIOutMods::MUL2;
1020  case 0x40800000: // 4.0
1021  return SIOutMods::MUL4;
1022  default:
1023  return SIOutMods::NONE;
1024  }
1025  }
1026  case AMDGPU::V_MUL_F16_e64: {
1027  switch (static_cast<uint16_t>(Val)) {
1028  case 0x3800: // 0.5
1029  return SIOutMods::DIV2;
1030  case 0x4000: // 2.0
1031  return SIOutMods::MUL2;
1032  case 0x4400: // 4.0
1033  return SIOutMods::MUL4;
1034  default:
1035  return SIOutMods::NONE;
1036  }
1037  }
1038  default:
1039  llvm_unreachable("invalid mul opcode");
1040  }
1041 }
1042 
1043 // FIXME: Does this really not support denormals with f16?
1044 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1045 // handled, so will anything other than that break?
1046 std::pair<const MachineOperand *, int>
1047 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1048  unsigned Op = MI.getOpcode();
1049  switch (Op) {
1050  case AMDGPU::V_MUL_F32_e64:
1051  case AMDGPU::V_MUL_F16_e64: {
1052  // If output denormals are enabled, omod is ignored.
1053  if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
1054  (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
1055  return std::make_pair(nullptr, SIOutMods::NONE);
1056 
1057  const MachineOperand *RegOp = nullptr;
1058  const MachineOperand *ImmOp = nullptr;
1059  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1060  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1061  if (Src0->isImm()) {
1062  ImmOp = Src0;
1063  RegOp = Src1;
1064  } else if (Src1->isImm()) {
1065  ImmOp = Src1;
1066  RegOp = Src0;
1067  } else
1068  return std::make_pair(nullptr, SIOutMods::NONE);
1069 
1070  int OMod = getOModValue(Op, ImmOp->getImm());
1071  if (OMod == SIOutMods::NONE ||
1072  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1073  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1074  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1075  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1076  return std::make_pair(nullptr, SIOutMods::NONE);
1077 
1078  return std::make_pair(RegOp, OMod);
1079  }
1080  case AMDGPU::V_ADD_F32_e64:
1081  case AMDGPU::V_ADD_F16_e64: {
1082  // If output denormals are enabled, omod is ignored.
1083  if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
1084  (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
1085  return std::make_pair(nullptr, SIOutMods::NONE);
1086 
1087  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1088  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1089  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1090 
1091  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1092  Src0->getSubReg() == Src1->getSubReg() &&
1093  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1094  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1095  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1096  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1097  return std::make_pair(Src0, SIOutMods::MUL2);
1098 
1099  return std::make_pair(nullptr, SIOutMods::NONE);
1100  }
1101  default:
1102  return std::make_pair(nullptr, SIOutMods::NONE);
1103  }
1104 }
1105 
1106 // FIXME: Does this need to check IEEE bit on function?
1107 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1108  const MachineOperand *RegOp;
1109  int OMod;
1110  std::tie(RegOp, OMod) = isOMod(MI);
1111  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1112  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1113  !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
1114  return false;
1115 
1116  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1117  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1118  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1119  return false;
1120 
1121  // Clamp is applied after omod. If the source already has clamp set, don't
1122  // fold it.
1123  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1124  return false;
1125 
1126  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
1127 
1128  DefOMod->setImm(OMod);
1129  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1130  MI.eraseFromParent();
1131  return true;
1132 }
1133 
1134 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1135  if (skipFunction(MF.getFunction()))
1136  return false;
1137 
1138  MRI = &MF.getRegInfo();
1139  ST = &MF.getSubtarget<GCNSubtarget>();
1140  TII = ST->getInstrInfo();
1141  TRI = &TII->getRegisterInfo();
1142 
1144 
1145  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1146  // correctly handle signed zeros.
1147  //
1148  // FIXME: Also need to check strictfp
1149  bool IsIEEEMode = MFI->getMode().IEEE;
1150  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1151 
1152  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1154  for (I = MBB->begin(); I != MBB->end(); I = Next) {
1155  Next = std::next(I);
1156  MachineInstr &MI = *I;
1157 
1158  tryFoldInst(TII, &MI);
1159 
1160  if (!TII->isFoldableCopy(MI)) {
1161  // TODO: Omod might be OK if there is NSZ only on the source
1162  // instruction, and not the omod multiply.
1163  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1164  !tryFoldOMod(MI))
1165  tryFoldClamp(MI);
1166  continue;
1167  }
1168 
1169  MachineOperand &OpToFold = MI.getOperand(1);
1170  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
1171 
1172  // FIXME: We could also be folding things like TargetIndexes.
1173  if (!FoldingImm && !OpToFold.isReg())
1174  continue;
1175 
1176  if (OpToFold.isReg() &&
1178  continue;
1179 
1180  // Prevent folding operands backwards in the function. For example,
1181  // the COPY opcode must not be replaced by 1 in this example:
1182  //
1183  // %3 = COPY %vgpr0; VGPR_32:%3
1184  // ...
1185  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1186  MachineOperand &Dst = MI.getOperand(0);
1187  if (Dst.isReg() &&
1189  continue;
1190 
1191  foldInstOperand(MI, OpToFold);
1192  }
1193  }
1194  return false;
1195 }
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:526
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:548
This class represents lattice values for constants.
Definition: AllocatorList.h:23
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:508
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool isRegSequence() const
unsigned const TargetRegisterInfo * TRI
F(f)
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:699
AMDGPU::SIModeRegisterDefaults getMode() const
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:515
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:78
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:47
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI, const GCNSubtarget &ST)
unsigned const MachineRegisterInfo * MRI
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:234
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Register is known to be fully dead.
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define DEBUG_TYPE
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
int64_t getImm() const
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Class for arbitrary precision integers.
Definition: APInt.h:69
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:72
FunctionPass * createSIFoldOperandsPass()
static unsigned UseReg(const MachineOperand &MO)
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:414
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:294
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:847
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool isImplicit() const
const SIRegisterInfo * getRegisterInfo() const override