LLVM  9.0.0svn
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 //
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIInstrInfo.h"
14 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Support/Debug.h"
24 
25 #define DEBUG_TYPE "si-fold-operands"
26 using namespace llvm;
27 
28 namespace {
29 
30 struct FoldCandidate {
32  union {
33  MachineOperand *OpToFold;
34  uint64_t ImmToFold;
35  int FrameIndexToFold;
36  };
37  int ShrinkOpcode;
38  unsigned char UseOpNo;
40  bool Commuted;
41 
42  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
43  bool Commuted_ = false,
44  int ShrinkOp = -1) :
45  UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
46  Kind(FoldOp->getType()),
47  Commuted(Commuted_) {
48  if (FoldOp->isImm()) {
49  ImmToFold = FoldOp->getImm();
50  } else if (FoldOp->isFI()) {
51  FrameIndexToFold = FoldOp->getIndex();
52  } else {
53  assert(FoldOp->isReg());
54  OpToFold = FoldOp;
55  }
56  }
57 
58  bool isFI() const {
59  return Kind == MachineOperand::MO_FrameIndex;
60  }
61 
62  bool isImm() const {
63  return Kind == MachineOperand::MO_Immediate;
64  }
65 
66  bool isReg() const {
67  return Kind == MachineOperand::MO_Register;
68  }
69 
70  bool isCommuted() const {
71  return Commuted;
72  }
73 
74  bool needsShrink() const {
75  return ShrinkOpcode != -1;
76  }
77 
78  int getShrinkOpcode() const {
79  return ShrinkOpcode;
80  }
81 };
82 
83 class SIFoldOperands : public MachineFunctionPass {
84 public:
85  static char ID;
87  const SIInstrInfo *TII;
88  const SIRegisterInfo *TRI;
89  const GCNSubtarget *ST;
90 
91  void foldOperand(MachineOperand &OpToFold,
93  unsigned UseOpIdx,
95  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
96 
97  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
98 
99  const MachineOperand *isClamp(const MachineInstr &MI) const;
100  bool tryFoldClamp(MachineInstr &MI);
101 
102  std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
103  bool tryFoldOMod(MachineInstr &MI);
104 
105 public:
106  SIFoldOperands() : MachineFunctionPass(ID) {
108  }
109 
110  bool runOnMachineFunction(MachineFunction &MF) override;
111 
112  StringRef getPassName() const override { return "SI Fold Operands"; }
113 
114  void getAnalysisUsage(AnalysisUsage &AU) const override {
115  AU.setPreservesCFG();
117  }
118 };
119 
120 } // End anonymous namespace.
121 
122 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
123  "SI Fold Operands", false, false)
124 
125 char SIFoldOperands::ID = 0;
126 
128 
129 // Wrapper around isInlineConstant that understands special cases when
130 // instruction types are replaced during operand folding.
132  const MachineInstr &UseMI,
133  unsigned OpNo,
134  const MachineOperand &OpToFold) {
135  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
136  return true;
137 
138  unsigned Opc = UseMI.getOpcode();
139  switch (Opc) {
140  case AMDGPU::V_MAC_F32_e64:
141  case AMDGPU::V_MAC_F16_e64:
142  case AMDGPU::V_FMAC_F32_e64: {
143  // Special case for mac. Since this is replaced with mad when folded into
144  // src2, we need to check the legality for the final instruction.
145  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
146  if (static_cast<int>(OpNo) == Src2Idx) {
147  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
148  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
149 
150  unsigned Opc = IsFMA ?
151  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
152  const MCInstrDesc &MadDesc = TII->get(Opc);
153  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
154  }
155  return false;
156  }
157  default:
158  return false;
159  }
160 }
161 
163  return new SIFoldOperands();
164 }
165 
166 static bool updateOperand(FoldCandidate &Fold,
167  const SIInstrInfo &TII,
168  const TargetRegisterInfo &TRI) {
169  MachineInstr *MI = Fold.UseMI;
170  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
171  assert(Old.isReg());
172 
173  if (Fold.isImm()) {
174  if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
175  // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
176  // already set.
177  unsigned Opcode = MI->getOpcode();
178  int OpNo = MI->getOperandNo(&Old);
179  int ModIdx = -1;
180  if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
181  ModIdx = AMDGPU::OpName::src0_modifiers;
182  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
183  ModIdx = AMDGPU::OpName::src1_modifiers;
184  else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
185  ModIdx = AMDGPU::OpName::src2_modifiers;
186  assert(ModIdx != -1);
187  ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
188  MachineOperand &Mod = MI->getOperand(ModIdx);
189  unsigned Val = Mod.getImm();
190  if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
191  return false;
192  // If upper part is all zero we do not need op_sel_hi.
193  if (!isUInt<16>(Fold.ImmToFold)) {
194  if (!(Fold.ImmToFold & 0xffff)) {
195  Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
196  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
197  Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
198  return true;
199  }
200  Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
201  }
202  }
203 
204  if (Fold.needsShrink()) {
205  MachineBasicBlock *MBB = MI->getParent();
206  auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
207  if (Liveness != MachineBasicBlock::LQR_Dead)
208  return false;
209 
211  int Op32 = Fold.getShrinkOpcode();
212  MachineOperand &Dst0 = MI->getOperand(0);
213  MachineOperand &Dst1 = MI->getOperand(1);
214  assert(Dst0.isDef() && Dst1.isDef());
215 
216  bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
217 
218  const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
219  unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
220  const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
221  unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
222 
223  MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
224 
225  if (HaveNonDbgCarryUse) {
226  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
227  .addReg(AMDGPU::VCC, RegState::Kill);
228  }
229 
230  // Keep the old instruction around to avoid breaking iterators, but
231  // replace the outputs with dummy registers.
232  Dst0.setReg(NewReg0);
233  Dst1.setReg(NewReg1);
234 
235  if (Fold.isCommuted())
236  TII.commuteInstruction(*Inst32, false);
237  return true;
238  }
239 
240  Old.ChangeToImmediate(Fold.ImmToFold);
241  return true;
242  }
243 
244  assert(!Fold.needsShrink() && "not handled");
245 
246  if (Fold.isFI()) {
247  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
248  return true;
249  }
250 
251  MachineOperand *New = Fold.OpToFold;
254  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
255 
256  Old.setIsUndef(New->isUndef());
257  return true;
258  }
259 
260  // FIXME: Handle physical registers.
261 
262  return false;
263 }
264 
266  const MachineInstr *MI) {
267  for (auto Candidate : FoldList) {
268  if (Candidate.UseMI == MI)
269  return true;
270  }
271  return false;
272 }
273 
275  MachineInstr *MI, unsigned OpNo,
276  MachineOperand *OpToFold,
277  const SIInstrInfo *TII) {
278  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
279 
280  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
281  unsigned Opc = MI->getOpcode();
282  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
283  Opc == AMDGPU::V_FMAC_F32_e64) &&
284  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
285  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
286  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
287  unsigned NewOpc = IsFMA ?
288  AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
289 
290  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
291  // to fold the operand.
292  MI->setDesc(TII->get(NewOpc));
293  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
294  if (FoldAsMAD) {
295  MI->untieRegOperand(OpNo);
296  return true;
297  }
298  MI->setDesc(TII->get(Opc));
299  }
300 
301  // Special case for s_setreg_b32
302  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
303  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
304  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
305  return true;
306  }
307 
308  // If we are already folding into another operand of MI, then
309  // we can't commute the instruction, otherwise we risk making the
310  // other fold illegal.
311  if (isUseMIInFoldList(FoldList, MI))
312  return false;
313 
314  unsigned CommuteOpNo = OpNo;
315 
316  // Operand is not legal, so try to commute the instruction to
317  // see if this makes it possible to fold.
318  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
319  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
320  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
321 
322  if (CanCommute) {
323  if (CommuteIdx0 == OpNo)
324  CommuteOpNo = CommuteIdx1;
325  else if (CommuteIdx1 == OpNo)
326  CommuteOpNo = CommuteIdx0;
327  }
328 
329 
330  // One of operands might be an Imm operand, and OpNo may refer to it after
331  // the call of commuteInstruction() below. Such situations are avoided
332  // here explicitly as OpNo must be a register operand to be a candidate
333  // for memory folding.
334  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
335  !MI->getOperand(CommuteIdx1).isReg()))
336  return false;
337 
338  if (!CanCommute ||
339  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
340  return false;
341 
342  if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
343  if ((Opc == AMDGPU::V_ADD_I32_e64 ||
344  Opc == AMDGPU::V_SUB_I32_e64 ||
345  Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
346  OpToFold->isImm()) {
348 
349  // Verify the other operand is a VGPR, otherwise we would violate the
350  // constant bus restriction.
351  unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
352  MachineOperand &OtherOp = MI->getOperand(OtherIdx);
353  if (!OtherOp.isReg() ||
354  !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
355  return false;
356 
357  assert(MI->getOperand(1).isDef());
358 
359  int Op32 = AMDGPU::getVOPe32(Opc);
360  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
361  Op32));
362  return true;
363  }
364 
365  TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
366  return false;
367  }
368 
369  FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
370  return true;
371  }
372 
373  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
374  return true;
375 }
376 
377 // If the use operand doesn't care about the value, this may be an operand only
378 // used for register indexing, in which case it is unsafe to fold.
379 static bool isUseSafeToFold(const SIInstrInfo *TII,
380  const MachineInstr &MI,
381  const MachineOperand &UseMO) {
382  return !UseMO.isUndef() && !TII->isSDWA(MI);
383  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
384 }
385 
386 void SIFoldOperands::foldOperand(
387  MachineOperand &OpToFold,
389  unsigned UseOpIdx,
391  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
392  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
393 
394  if (!isUseSafeToFold(TII, *UseMI, UseOp))
395  return;
396 
397  // FIXME: Fold operands with subregs.
398  if (UseOp.isReg() && OpToFold.isReg()) {
399  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
400  return;
401 
402  // Don't fold subregister extracts into tied operands, only if it is a full
403  // copy since a subregister use tied to a full register def doesn't really
404  // make sense. e.g. don't fold:
405  //
406  // %1 = COPY %0:sub1
407  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
408  //
409  // into
410  // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
411  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
412  return;
413  }
414 
415  // Special case for REG_SEQUENCE: We can't fold literals into
416  // REG_SEQUENCE instructions, so we have to fold them into the
417  // uses of REG_SEQUENCE.
418  if (UseMI->isRegSequence()) {
419  unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
420  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
421 
423  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
424  RSUse != RSE; ++RSUse) {
425 
426  MachineInstr *RSUseMI = RSUse->getParent();
427  if (RSUse->getSubReg() != RegSeqDstSubReg)
428  continue;
429 
430  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
431  CopiesToReplace);
432  }
433 
434  return;
435  }
436 
437 
438  bool FoldingImm = OpToFold.isImm();
439 
440  if (FoldingImm && UseMI->isCopy()) {
441  unsigned DestReg = UseMI->getOperand(0).getReg();
442  const TargetRegisterClass *DestRC
444  MRI->getRegClass(DestReg) :
445  TRI->getPhysRegClass(DestReg);
446 
447  unsigned SrcReg = UseMI->getOperand(1).getReg();
450  const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
451  if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
455  Use = MRI->use_begin(DestReg), E = MRI->use_end();
456  Use != E; Use = NextUse) {
457  NextUse = std::next(Use);
458  FoldCandidate FC = FoldCandidate(Use->getParent(),
459  Use.getOperandNo(), &UseMI->getOperand(1));
460  CopyUses.push_back(FC);
461  }
462  for (auto & F : CopyUses) {
463  foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
464  FoldList, CopiesToReplace);
465  }
466  }
467  }
468 
469  // In order to fold immediates into copies, we need to change the
470  // copy to a MOV.
471 
472  unsigned MovOp = TII->getMovOpcode(DestRC);
473  if (MovOp == AMDGPU::COPY)
474  return;
475 
476  UseMI->setDesc(TII->get(MovOp));
477  CopiesToReplace.push_back(UseMI);
478  } else {
479  if (UseMI->isCopy() && OpToFold.isReg() &&
482  TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
483  TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
484  !UseMI->getOperand(1).getSubReg()) {
485  UseMI->getOperand(1).setReg(OpToFold.getReg());
486  UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
487  UseMI->getOperand(1).setIsKill(false);
488  CopiesToReplace.push_back(UseMI);
489  OpToFold.setIsKill(false);
490  return;
491  }
492 
493  const MCInstrDesc &UseDesc = UseMI->getDesc();
494 
495  // Don't fold into target independent nodes. Target independent opcodes
496  // don't have defined register classes.
497  if (UseDesc.isVariadic() ||
498  UseOp.isImplicit() ||
499  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
500  return;
501  }
502 
503  if (!FoldingImm) {
504  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
505 
506  // FIXME: We could try to change the instruction from 64-bit to 32-bit
507  // to enable more folding opportunites. The shrink operands pass
508  // already does this.
509  return;
510  }
511 
512 
513  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
514  const TargetRegisterClass *FoldRC =
515  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
516 
517 
518  // Split 64-bit constants into 32-bits for folding.
519  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
520  unsigned UseReg = UseOp.getReg();
521  const TargetRegisterClass *UseRC
523  MRI->getRegClass(UseReg) :
524  TRI->getPhysRegClass(UseReg);
525 
526  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
527  return;
528 
529  APInt Imm(64, OpToFold.getImm());
530  if (UseOp.getSubReg() == AMDGPU::sub0) {
531  Imm = Imm.getLoBits(32);
532  } else {
533  assert(UseOp.getSubReg() == AMDGPU::sub1);
534  Imm = Imm.getHiBits(32);
535  }
536 
537  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
538  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
539  return;
540  }
541 
542 
543 
544  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
545 }
546 
547 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
548  uint32_t LHS, uint32_t RHS) {
549  switch (Opcode) {
550  case AMDGPU::V_AND_B32_e64:
551  case AMDGPU::V_AND_B32_e32:
552  case AMDGPU::S_AND_B32:
553  Result = LHS & RHS;
554  return true;
555  case AMDGPU::V_OR_B32_e64:
556  case AMDGPU::V_OR_B32_e32:
557  case AMDGPU::S_OR_B32:
558  Result = LHS | RHS;
559  return true;
560  case AMDGPU::V_XOR_B32_e64:
561  case AMDGPU::V_XOR_B32_e32:
562  case AMDGPU::S_XOR_B32:
563  Result = LHS ^ RHS;
564  return true;
565  case AMDGPU::V_LSHL_B32_e64:
566  case AMDGPU::V_LSHL_B32_e32:
567  case AMDGPU::S_LSHL_B32:
568  // The instruction ignores the high bits for out of bounds shifts.
569  Result = LHS << (RHS & 31);
570  return true;
571  case AMDGPU::V_LSHLREV_B32_e64:
572  case AMDGPU::V_LSHLREV_B32_e32:
573  Result = RHS << (LHS & 31);
574  return true;
575  case AMDGPU::V_LSHR_B32_e64:
576  case AMDGPU::V_LSHR_B32_e32:
577  case AMDGPU::S_LSHR_B32:
578  Result = LHS >> (RHS & 31);
579  return true;
580  case AMDGPU::V_LSHRREV_B32_e64:
581  case AMDGPU::V_LSHRREV_B32_e32:
582  Result = RHS >> (LHS & 31);
583  return true;
584  case AMDGPU::V_ASHR_I32_e64:
585  case AMDGPU::V_ASHR_I32_e32:
586  case AMDGPU::S_ASHR_I32:
587  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
588  return true;
589  case AMDGPU::V_ASHRREV_I32_e64:
590  case AMDGPU::V_ASHRREV_I32_e32:
591  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
592  return true;
593  default:
594  return false;
595  }
596 }
597 
598 static unsigned getMovOpc(bool IsScalar) {
599  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
600 }
601 
602 /// Remove any leftover implicit operands from mutating the instruction. e.g.
603 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
604 /// anymore.
606  const MCInstrDesc &Desc = MI.getDesc();
607  unsigned NumOps = Desc.getNumOperands() +
608  Desc.getNumImplicitUses() +
609  Desc.getNumImplicitDefs();
610 
611  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
612  MI.RemoveOperand(I);
613 }
614 
615 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
616  MI.setDesc(NewDesc);
618 }
619 
621  MachineOperand &Op) {
622  if (Op.isReg()) {
623  // If this has a subregister, it obviously is a register source.
624  if (Op.getSubReg() != AMDGPU::NoSubRegister ||
626  return &Op;
627 
628  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
629  if (Def && Def->isMoveImmediate()) {
630  MachineOperand &ImmSrc = Def->getOperand(1);
631  if (ImmSrc.isImm())
632  return &ImmSrc;
633  }
634  }
635 
636  return &Op;
637 }
638 
639 // Try to simplify operations with a constant that may appear after instruction
640 // selection.
641 // TODO: See if a frame index with a fixed offset can fold.
643  const SIInstrInfo *TII,
644  MachineInstr *MI,
645  MachineOperand *ImmOp) {
646  unsigned Opc = MI->getOpcode();
647  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
648  Opc == AMDGPU::S_NOT_B32) {
649  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
650  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
651  return true;
652  }
653 
654  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
655  if (Src1Idx == -1)
656  return false;
657 
658  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
659  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
660  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
661 
662  if (!Src0->isImm() && !Src1->isImm())
663  return false;
664 
665  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
666  if (Src0->isImm() && Src0->getImm() == 0) {
667  // v_lshl_or_b32 0, X, Y -> copy Y
668  // v_lshl_or_b32 0, X, K -> v_mov_b32 K
669  bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
670  MI->RemoveOperand(Src1Idx);
671  MI->RemoveOperand(Src0Idx);
672 
673  MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
674  return true;
675  }
676  }
677 
678  // and k0, k1 -> v_mov_b32 (k0 & k1)
679  // or k0, k1 -> v_mov_b32 (k0 | k1)
680  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
681  if (Src0->isImm() && Src1->isImm()) {
682  int32_t NewImm;
683  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
684  return false;
685 
686  const SIRegisterInfo &TRI = TII->getRegisterInfo();
687  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
688 
689  // Be careful to change the right operand, src0 may belong to a different
690  // instruction.
691  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
692  MI->RemoveOperand(Src1Idx);
693  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
694  return true;
695  }
696 
697  if (!MI->isCommutable())
698  return false;
699 
700  if (Src0->isImm() && !Src1->isImm()) {
701  std::swap(Src0, Src1);
702  std::swap(Src0Idx, Src1Idx);
703  }
704 
705  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
706  if (Opc == AMDGPU::V_OR_B32_e64 ||
707  Opc == AMDGPU::V_OR_B32_e32 ||
708  Opc == AMDGPU::S_OR_B32) {
709  if (Src1Val == 0) {
710  // y = or x, 0 => y = copy x
711  MI->RemoveOperand(Src1Idx);
712  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
713  } else if (Src1Val == -1) {
714  // y = or x, -1 => y = v_mov_b32 -1
715  MI->RemoveOperand(Src1Idx);
716  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
717  } else
718  return false;
719 
720  return true;
721  }
722 
723  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
724  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
725  MI->getOpcode() == AMDGPU::S_AND_B32) {
726  if (Src1Val == 0) {
727  // y = and x, 0 => y = v_mov_b32 0
728  MI->RemoveOperand(Src0Idx);
729  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
730  } else if (Src1Val == -1) {
731  // y = and x, -1 => y = copy x
732  MI->RemoveOperand(Src1Idx);
733  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
735  } else
736  return false;
737 
738  return true;
739  }
740 
741  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
742  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
743  MI->getOpcode() == AMDGPU::S_XOR_B32) {
744  if (Src1Val == 0) {
745  // y = xor x, 0 => y = copy x
746  MI->RemoveOperand(Src1Idx);
747  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
748  return true;
749  }
750  }
751 
752  return false;
753 }
754 
755 // Try to fold an instruction into a simpler one
756 static bool tryFoldInst(const SIInstrInfo *TII,
757  MachineInstr *MI) {
758  unsigned Opc = MI->getOpcode();
759 
760  if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
761  Opc == AMDGPU::V_CNDMASK_B32_e64 ||
762  Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
763  const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
764  const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
765  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
766  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
767  if (Src1->isIdenticalTo(*Src0) &&
768  (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) &&
769  (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) {
770  LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
771  auto &NewDesc =
772  TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
773  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
774  if (Src2Idx != -1)
775  MI->RemoveOperand(Src2Idx);
776  MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
777  if (Src1ModIdx != -1)
778  MI->RemoveOperand(Src1ModIdx);
779  if (Src0ModIdx != -1)
780  MI->RemoveOperand(Src0ModIdx);
781  mutateCopyOp(*MI, NewDesc);
782  LLVM_DEBUG(dbgs() << *MI << '\n');
783  return true;
784  }
785  }
786 
787  return false;
788 }
789 
790 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
791  MachineOperand &OpToFold) const {
792  // We need mutate the operands of new mov instructions to add implicit
793  // uses of EXEC, but adding them invalidates the use_iterator, so defer
794  // this.
795  SmallVector<MachineInstr *, 4> CopiesToReplace;
797  MachineOperand &Dst = MI.getOperand(0);
798 
799  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
800  if (FoldingImm) {
801  unsigned NumLiteralUses = 0;
802  MachineOperand *NonInlineUse = nullptr;
803  int NonInlineUseOpNo = -1;
804 
807  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
808  Use != E; Use = NextUse) {
809  NextUse = std::next(Use);
810  MachineInstr *UseMI = Use->getParent();
811  unsigned OpNo = Use.getOperandNo();
812 
813  // Folding the immediate may reveal operations that can be constant
814  // folded or replaced with a copy. This can happen for example after
815  // frame indices are lowered to constants or from splitting 64-bit
816  // constants.
817  //
818  // We may also encounter cases where one or both operands are
819  // immediates materialized into a register, which would ordinarily not
820  // be folded due to multiple uses or operand constraints.
821 
822  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
823  LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n');
824 
825  // Some constant folding cases change the same immediate's use to a new
826  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
827  // again. The same constant folded instruction could also have a second
828  // use operand.
829  NextUse = MRI->use_begin(Dst.getReg());
830  FoldList.clear();
831  continue;
832  }
833 
834  // Try to fold any inline immediate uses, and then only fold other
835  // constants if they have one use.
836  //
837  // The legality of the inline immediate must be checked based on the use
838  // operand, not the defining instruction, because 32-bit instructions
839  // with 32-bit inline immediate sources may be used to materialize
840  // constants used in 16-bit operands.
841  //
842  // e.g. it is unsafe to fold:
843  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
844  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
845 
846  // Folding immediates with more than one use will increase program size.
847  // FIXME: This will also reduce register usage, which may be better
848  // in some cases. A better heuristic is needed.
849  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
850  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
851  } else {
852  if (++NumLiteralUses == 1) {
853  NonInlineUse = &*Use;
854  NonInlineUseOpNo = OpNo;
855  }
856  }
857  }
858 
859  if (NumLiteralUses == 1) {
860  MachineInstr *UseMI = NonInlineUse->getParent();
861  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
862  }
863  } else {
864  // Folding register.
867  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
868  Use != E; ++Use) {
869  UsesToProcess.push_back(Use);
870  }
871  for (auto U : UsesToProcess) {
872  MachineInstr *UseMI = U->getParent();
873 
874  foldOperand(OpToFold, UseMI, U.getOperandNo(),
875  FoldList, CopiesToReplace);
876  }
877  }
878 
879  MachineFunction *MF = MI.getParent()->getParent();
880  // Make sure we add EXEC uses to any new v_mov instructions created.
881  for (MachineInstr *Copy : CopiesToReplace)
882  Copy->addImplicitDefUseOperands(*MF);
883 
884  for (FoldCandidate &Fold : FoldList) {
885  if (updateOperand(Fold, *TII, *TRI)) {
886  // Clear kill flags.
887  if (Fold.isReg()) {
888  assert(Fold.OpToFold && Fold.OpToFold->isReg());
889  // FIXME: Probably shouldn't bother trying to fold if not an
890  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
891  // copies.
892  MRI->clearKillFlags(Fold.OpToFold->getReg());
893  }
894  LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
895  << static_cast<int>(Fold.UseOpNo) << " of "
896  << *Fold.UseMI << '\n');
897  tryFoldInst(TII, Fold.UseMI);
898  } else if (Fold.isCommuted()) {
899  // Restoring instruction's original operand order if fold has failed.
900  TII->commuteInstruction(*Fold.UseMI, false);
901  }
902  }
903 }
904 
905 // Clamp patterns are canonically selected to v_max_* instructions, so only
906 // handle them.
907 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
908  unsigned Op = MI.getOpcode();
909  switch (Op) {
910  case AMDGPU::V_MAX_F32_e64:
911  case AMDGPU::V_MAX_F16_e64:
912  case AMDGPU::V_MAX_F64:
913  case AMDGPU::V_PK_MAX_F16: {
914  if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
915  return nullptr;
916 
917  // Make sure sources are identical.
918  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
919  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
920  if (!Src0->isReg() || !Src1->isReg() ||
921  Src0->getReg() != Src1->getReg() ||
922  Src0->getSubReg() != Src1->getSubReg() ||
923  Src0->getSubReg() != AMDGPU::NoSubRegister)
924  return nullptr;
925 
926  // Can't fold up if we have modifiers.
927  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
928  return nullptr;
929 
930  unsigned Src0Mods
931  = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
932  unsigned Src1Mods
933  = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
934 
935  // Having a 0 op_sel_hi would require swizzling the output in the source
936  // instruction, which we can't do.
937  unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
938  : 0u;
939  if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
940  return nullptr;
941  return Src0;
942  }
943  default:
944  return nullptr;
945  }
946 }
947 
948 // We obviously have multiple uses in a clamp since the register is used twice
949 // in the same instruction.
950 static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
951  int Count = 0;
952  for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
953  I != E; ++I) {
954  if (++Count > 1)
955  return false;
956  }
957 
958  return true;
959 }
960 
961 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
962 bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
963  const MachineOperand *ClampSrc = isClamp(MI);
964  if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
965  return false;
966 
967  MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
968 
969  // The type of clamp must be compatible.
970  if (TII->getClampMask(*Def) != TII->getClampMask(MI))
971  return false;
972 
973  MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
974  if (!DefClamp)
975  return false;
976 
977  LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def
978  << '\n');
979 
980  // Clamp is applied after omod, so it is OK if omod is set.
981  DefClamp->setImm(1);
982  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
983  MI.eraseFromParent();
984  return true;
985 }
986 
987 static int getOModValue(unsigned Opc, int64_t Val) {
988  switch (Opc) {
989  case AMDGPU::V_MUL_F32_e64: {
990  switch (static_cast<uint32_t>(Val)) {
991  case 0x3f000000: // 0.5
992  return SIOutMods::DIV2;
993  case 0x40000000: // 2.0
994  return SIOutMods::MUL2;
995  case 0x40800000: // 4.0
996  return SIOutMods::MUL4;
997  default:
998  return SIOutMods::NONE;
999  }
1000  }
1001  case AMDGPU::V_MUL_F16_e64: {
1002  switch (static_cast<uint16_t>(Val)) {
1003  case 0x3800: // 0.5
1004  return SIOutMods::DIV2;
1005  case 0x4000: // 2.0
1006  return SIOutMods::MUL2;
1007  case 0x4400: // 4.0
1008  return SIOutMods::MUL4;
1009  default:
1010  return SIOutMods::NONE;
1011  }
1012  }
1013  default:
1014  llvm_unreachable("invalid mul opcode");
1015  }
1016 }
1017 
1018 // FIXME: Does this really not support denormals with f16?
1019 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1020 // handled, so will anything other than that break?
1021 std::pair<const MachineOperand *, int>
1022 SIFoldOperands::isOMod(const MachineInstr &MI) const {
1023  unsigned Op = MI.getOpcode();
1024  switch (Op) {
1025  case AMDGPU::V_MUL_F32_e64:
1026  case AMDGPU::V_MUL_F16_e64: {
1027  // If output denormals are enabled, omod is ignored.
1028  if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
1029  (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
1030  return std::make_pair(nullptr, SIOutMods::NONE);
1031 
1032  const MachineOperand *RegOp = nullptr;
1033  const MachineOperand *ImmOp = nullptr;
1034  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1035  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1036  if (Src0->isImm()) {
1037  ImmOp = Src0;
1038  RegOp = Src1;
1039  } else if (Src1->isImm()) {
1040  ImmOp = Src1;
1041  RegOp = Src0;
1042  } else
1043  return std::make_pair(nullptr, SIOutMods::NONE);
1044 
1045  int OMod = getOModValue(Op, ImmOp->getImm());
1046  if (OMod == SIOutMods::NONE ||
1047  TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1048  TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1049  TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1050  TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1051  return std::make_pair(nullptr, SIOutMods::NONE);
1052 
1053  return std::make_pair(RegOp, OMod);
1054  }
1055  case AMDGPU::V_ADD_F32_e64:
1056  case AMDGPU::V_ADD_F16_e64: {
1057  // If output denormals are enabled, omod is ignored.
1058  if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
1059  (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
1060  return std::make_pair(nullptr, SIOutMods::NONE);
1061 
1062  // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
1063  const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1064  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1065 
1066  if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
1067  Src0->getSubReg() == Src1->getSubReg() &&
1068  !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1069  !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1070  !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1071  !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1072  return std::make_pair(Src0, SIOutMods::MUL2);
1073 
1074  return std::make_pair(nullptr, SIOutMods::NONE);
1075  }
1076  default:
1077  return std::make_pair(nullptr, SIOutMods::NONE);
1078  }
1079 }
1080 
1081 // FIXME: Does this need to check IEEE bit on function?
1082 bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1083  const MachineOperand *RegOp;
1084  int OMod;
1085  std::tie(RegOp, OMod) = isOMod(MI);
1086  if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
1087  RegOp->getSubReg() != AMDGPU::NoSubRegister ||
1088  !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
1089  return false;
1090 
1091  MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
1092  MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
1093  if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
1094  return false;
1095 
1096  // Clamp is applied after omod. If the source already has clamp set, don't
1097  // fold it.
1098  if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1099  return false;
1100 
1101  LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
1102 
1103  DefOMod->setImm(OMod);
1104  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1105  MI.eraseFromParent();
1106  return true;
1107 }
1108 
1109 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
1110  if (skipFunction(MF.getFunction()))
1111  return false;
1112 
1113  MRI = &MF.getRegInfo();
1114  ST = &MF.getSubtarget<GCNSubtarget>();
1115  TII = ST->getInstrInfo();
1116  TRI = &TII->getRegisterInfo();
1117 
1119 
1120  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
1121  // correctly handle signed zeros.
1122  //
1123  // FIXME: Also need to check strictfp
1124  bool IsIEEEMode = MFI->getMode().IEEE;
1125  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1126 
1127  for (MachineBasicBlock *MBB : depth_first(&MF)) {
1129  for (I = MBB->begin(); I != MBB->end(); I = Next) {
1130  Next = std::next(I);
1131  MachineInstr &MI = *I;
1132 
1133  tryFoldInst(TII, &MI);
1134 
1135  if (!TII->isFoldableCopy(MI)) {
1136  // TODO: Omod might be OK if there is NSZ only on the source
1137  // instruction, and not the omod multiply.
1138  if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1139  !tryFoldOMod(MI))
1140  tryFoldClamp(MI);
1141  continue;
1142  }
1143 
1144  MachineOperand &OpToFold = MI.getOperand(1);
1145  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
1146 
1147  // FIXME: We could also be folding things like TargetIndexes.
1148  if (!FoldingImm && !OpToFold.isReg())
1149  continue;
1150 
1151  if (OpToFold.isReg() &&
1153  continue;
1154 
1155  // Prevent folding operands backwards in the function. For example,
1156  // the COPY opcode must not be replaced by 1 in this example:
1157  //
1158  // %3 = COPY %vgpr0; VGPR_32:%3
1159  // ...
1160  // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1161  MachineOperand &Dst = MI.getOperand(0);
1162  if (Dst.isReg() &&
1164  continue;
1165 
1166  foldInstOperand(MI, OpToFold);
1167  }
1168  }
1169  return false;
1170 }
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:526
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:548
This class represents lattice values for constants.
Definition: AllocatorList.h:23
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:508
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool isRegSequence() const
unsigned const TargetRegisterInfo * TRI
F(f)
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:699
AMDGPU::SIModeRegisterDefaults getMode() const
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:515
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:78
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:47
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:234
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Register is known to be fully dead.
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define DEBUG_TYPE
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
int64_t getImm() const
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Class for arbitrary precision integers.
Definition: APInt.h:69
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:72
FunctionPass * createSIFoldOperandsPass()
static unsigned UseReg(const MachineOperand &MO)
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
Definition: SIInstrInfo.h:406
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:294
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:847
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool isImplicit() const
const SIRegisterInfo * getRegisterInfo() const override