LLVM  4.0.0
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
19 #include "llvm/Support/Debug.h"
22 
23 #define DEBUG_TYPE "si-fold-operands"
24 using namespace llvm;
25 
26 namespace {
27 
28 struct FoldCandidate {
30  union {
31  MachineOperand *OpToFold;
32  uint64_t ImmToFold;
33  int FrameIndexToFold;
34  };
35  unsigned char UseOpNo;
37 
38  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
39  UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {
40  if (FoldOp->isImm()) {
41  ImmToFold = FoldOp->getImm();
42  } else if (FoldOp->isFI()) {
43  FrameIndexToFold = FoldOp->getIndex();
44  } else {
45  assert(FoldOp->isReg());
46  OpToFold = FoldOp;
47  }
48  }
49 
50  bool isFI() const {
52  }
53 
54  bool isImm() const {
56  }
57 
58  bool isReg() const {
60  }
61 };
62 
63 class SIFoldOperands : public MachineFunctionPass {
64 public:
65  static char ID;
67  const SIInstrInfo *TII;
68  const SIRegisterInfo *TRI;
69 
70  void foldOperand(MachineOperand &OpToFold,
72  unsigned UseOpIdx,
74  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
75 
76  void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
77 
78 public:
79  SIFoldOperands() : MachineFunctionPass(ID) {
81  }
82 
83  bool runOnMachineFunction(MachineFunction &MF) override;
84 
85  StringRef getPassName() const override { return "SI Fold Operands"; }
86 
87  void getAnalysisUsage(AnalysisUsage &AU) const override {
88  AU.setPreservesCFG();
90  }
91 };
92 
93 } // End anonymous namespace.
94 
95 INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
96  "SI Fold Operands", false, false)
97 
98 char SIFoldOperands::ID = 0;
99 
100 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
101 
102 // Wrapper around isInlineConstant that understands special cases when
103 // instruction types are replaced during operand folding.
106  unsigned OpNo,
107  const MachineOperand &OpToFold) {
108  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
109  return true;
110 
111  unsigned Opc = UseMI.getOpcode();
112  switch (Opc) {
113  case AMDGPU::V_MAC_F32_e64:
114  case AMDGPU::V_MAC_F16_e64: {
115  // Special case for mac. Since this is replaced with mad when folded into
116  // src2, we need to check the legality for the final instruction.
117  int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
118  if (static_cast<int>(OpNo) == Src2Idx) {
119  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
120  const MCInstrDesc &MadDesc
121  = TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
122  return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
123  }
124  }
125  default:
126  return false;
127  }
128 }
129 
131  return new SIFoldOperands();
132 }
133 
134 static bool isSafeToFold(const MachineInstr &MI) {
135  switch (MI.getOpcode()) {
136  case AMDGPU::V_MOV_B32_e32:
137  case AMDGPU::V_MOV_B32_e64:
138  case AMDGPU::V_MOV_B64_PSEUDO: {
139  // If there are additional implicit register operands, this may be used for
140  // register indexing so the source register operand isn't simply copied.
141  unsigned NumOps = MI.getDesc().getNumOperands() +
143 
144  return MI.getNumOperands() == NumOps;
145  }
146  case AMDGPU::S_MOV_B32:
147  case AMDGPU::S_MOV_B64:
148  case AMDGPU::COPY:
149  return true;
150  default:
151  return false;
152  }
153 }
154 
155 static bool updateOperand(FoldCandidate &Fold,
156  const TargetRegisterInfo &TRI) {
157  MachineInstr *MI = Fold.UseMI;
158  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
159  assert(Old.isReg());
160 
161  if (Fold.isImm()) {
162  Old.ChangeToImmediate(Fold.ImmToFold);
163  return true;
164  }
165 
166  if (Fold.isFI()) {
167  Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
168  return true;
169  }
170 
171  MachineOperand *New = Fold.OpToFold;
174  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
175  return true;
176  }
177 
178  // FIXME: Handle physical registers.
179 
180  return false;
181 }
182 
184  const MachineInstr *MI) {
185  for (auto Candidate : FoldList) {
186  if (Candidate.UseMI == MI)
187  return true;
188  }
189  return false;
190 }
191 
193  MachineInstr *MI, unsigned OpNo,
194  MachineOperand *OpToFold,
195  const SIInstrInfo *TII) {
196  if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
197 
198  // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
199  unsigned Opc = MI->getOpcode();
200  if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64) &&
201  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
202  bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
203 
204  // Check if changing this to a v_mad_{f16, f32} instruction will allow us
205  // to fold the operand.
206  MI->setDesc(TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16));
207  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
208  if (FoldAsMAD) {
209  MI->untieRegOperand(OpNo);
210  return true;
211  }
212  MI->setDesc(TII->get(Opc));
213  }
214 
215  // Special case for s_setreg_b32
216  if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
217  MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
218  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
219  return true;
220  }
221 
222  // If we are already folding into another operand of MI, then
223  // we can't commute the instruction, otherwise we risk making the
224  // other fold illegal.
225  if (isUseMIInFoldList(FoldList, MI))
226  return false;
227 
228  // Operand is not legal, so try to commute the instruction to
229  // see if this makes it possible to fold.
230  unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
231  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
232  bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
233 
234  if (CanCommute) {
235  if (CommuteIdx0 == OpNo)
236  OpNo = CommuteIdx1;
237  else if (CommuteIdx1 == OpNo)
238  OpNo = CommuteIdx0;
239  }
240 
241  // One of operands might be an Imm operand, and OpNo may refer to it after
242  // the call of commuteInstruction() below. Such situations are avoided
243  // here explicitly as OpNo must be a register operand to be a candidate
244  // for memory folding.
245  if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
246  !MI->getOperand(CommuteIdx1).isReg()))
247  return false;
248 
249  if (!CanCommute ||
250  !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
251  return false;
252 
253  if (!TII->isOperandLegal(*MI, OpNo, OpToFold))
254  return false;
255  }
256 
257  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
258  return true;
259 }
260 
261 // If the use operand doesn't care about the value, this may be an operand only
262 // used for register indexing, in which case it is unsafe to fold.
263 static bool isUseSafeToFold(const MachineInstr &MI,
264  const MachineOperand &UseMO) {
265  return !UseMO.isUndef();
266  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
267 }
268 
269 void SIFoldOperands::foldOperand(
270  MachineOperand &OpToFold,
272  unsigned UseOpIdx,
274  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
275  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
276 
277  if (!isUseSafeToFold(*UseMI, UseOp))
278  return;
279 
280  // FIXME: Fold operands with subregs.
281  if (UseOp.isReg() && OpToFold.isReg()) {
282  if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
283  return;
284 
285  // Don't fold subregister extracts into tied operands, only if it is a full
286  // copy since a subregister use tied to a full register def doesn't really
287  // make sense. e.g. don't fold:
288  //
289  // %vreg1 = COPY %vreg0:sub1
290  // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0>
291  //
292  // into
293  // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0>
294  if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
295  return;
296  }
297 
298  // Special case for REG_SEQUENCE: We can't fold literals into
299  // REG_SEQUENCE instructions, so we have to fold them into the
300  // uses of REG_SEQUENCE.
301  if (UseMI->isRegSequence()) {
302  unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
303  unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
304 
306  RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
307  RSUse != RSE; ++RSUse) {
308 
309  MachineInstr *RSUseMI = RSUse->getParent();
310  if (RSUse->getSubReg() != RegSeqDstSubReg)
311  continue;
312 
313  foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
314  CopiesToReplace);
315  }
316 
317  return;
318  }
319 
320 
321  bool FoldingImm = OpToFold.isImm();
322 
323  // In order to fold immediates into copies, we need to change the
324  // copy to a MOV.
325  if (FoldingImm && UseMI->isCopy()) {
326  unsigned DestReg = UseMI->getOperand(0).getReg();
327  const TargetRegisterClass *DestRC
329  MRI->getRegClass(DestReg) :
330  TRI->getPhysRegClass(DestReg);
331 
332  unsigned MovOp = TII->getMovOpcode(DestRC);
333  if (MovOp == AMDGPU::COPY)
334  return;
335 
336  UseMI->setDesc(TII->get(MovOp));
337  CopiesToReplace.push_back(UseMI);
338  } else {
339  const MCInstrDesc &UseDesc = UseMI->getDesc();
340 
341  // Don't fold into target independent nodes. Target independent opcodes
342  // don't have defined register classes.
343  if (UseDesc.isVariadic() ||
344  UseDesc.OpInfo[UseOpIdx].RegClass == -1)
345  return;
346  }
347 
348  if (!FoldingImm) {
349  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
350 
351  // FIXME: We could try to change the instruction from 64-bit to 32-bit
352  // to enable more folding opportunites. The shrink operands pass
353  // already does this.
354  return;
355  }
356 
357 
358  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
359  const TargetRegisterClass *FoldRC =
360  TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
361 
362  APInt Imm(TII->operandBitWidth(FoldDesc.OpInfo[1].OperandType),
363  OpToFold.getImm());
364 
365  // Split 64-bit constants into 32-bits for folding.
366  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
367  unsigned UseReg = UseOp.getReg();
368  const TargetRegisterClass *UseRC
370  MRI->getRegClass(UseReg) :
371  TRI->getPhysRegClass(UseReg);
372 
373  assert(Imm.getBitWidth() == 64);
374 
375  if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
376  return;
377 
378  if (UseOp.getSubReg() == AMDGPU::sub0) {
379  Imm = Imm.getLoBits(32);
380  } else {
381  assert(UseOp.getSubReg() == AMDGPU::sub1);
382  Imm = Imm.getHiBits(32);
383  }
384  }
385 
386  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
387  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
388 }
389 
390 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
391  uint32_t LHS, uint32_t RHS) {
392  switch (Opcode) {
393  case AMDGPU::V_AND_B32_e64:
394  case AMDGPU::V_AND_B32_e32:
395  case AMDGPU::S_AND_B32:
396  Result = LHS & RHS;
397  return true;
398  case AMDGPU::V_OR_B32_e64:
399  case AMDGPU::V_OR_B32_e32:
400  case AMDGPU::S_OR_B32:
401  Result = LHS | RHS;
402  return true;
403  case AMDGPU::V_XOR_B32_e64:
404  case AMDGPU::V_XOR_B32_e32:
405  case AMDGPU::S_XOR_B32:
406  Result = LHS ^ RHS;
407  return true;
408  case AMDGPU::V_LSHL_B32_e64:
409  case AMDGPU::V_LSHL_B32_e32:
410  case AMDGPU::S_LSHL_B32:
411  // The instruction ignores the high bits for out of bounds shifts.
412  Result = LHS << (RHS & 31);
413  return true;
414  case AMDGPU::V_LSHLREV_B32_e64:
415  case AMDGPU::V_LSHLREV_B32_e32:
416  Result = RHS << (LHS & 31);
417  return true;
418  case AMDGPU::V_LSHR_B32_e64:
419  case AMDGPU::V_LSHR_B32_e32:
420  case AMDGPU::S_LSHR_B32:
421  Result = LHS >> (RHS & 31);
422  return true;
423  case AMDGPU::V_LSHRREV_B32_e64:
424  case AMDGPU::V_LSHRREV_B32_e32:
425  Result = RHS >> (LHS & 31);
426  return true;
427  case AMDGPU::V_ASHR_I32_e64:
428  case AMDGPU::V_ASHR_I32_e32:
429  case AMDGPU::S_ASHR_I32:
430  Result = static_cast<int32_t>(LHS) >> (RHS & 31);
431  return true;
432  case AMDGPU::V_ASHRREV_I32_e64:
433  case AMDGPU::V_ASHRREV_I32_e32:
434  Result = static_cast<int32_t>(RHS) >> (LHS & 31);
435  return true;
436  default:
437  return false;
438  }
439 }
440 
441 static unsigned getMovOpc(bool IsScalar) {
442  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
443 }
444 
445 /// Remove any leftover implicit operands from mutating the instruction. e.g.
446 /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
447 /// anymore.
449  const MCInstrDesc &Desc = MI.getDesc();
450  unsigned NumOps = Desc.getNumOperands() +
451  Desc.getNumImplicitUses() +
452  Desc.getNumImplicitDefs();
453 
454  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
455  MI.RemoveOperand(I);
456 }
457 
458 static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
459  MI.setDesc(NewDesc);
461 }
462 
464  MachineOperand &Op) {
465  if (Op.isReg()) {
466  // If this has a subregister, it obviously is a register source.
467  if (Op.getSubReg() != AMDGPU::NoSubRegister)
468  return &Op;
469 
470  MachineInstr *Def = MRI.getVRegDef(Op.getReg());
471  if (Def->isMoveImmediate()) {
472  MachineOperand &ImmSrc = Def->getOperand(1);
473  if (ImmSrc.isImm())
474  return &ImmSrc;
475  }
476  }
477 
478  return &Op;
479 }
480 
481 // Try to simplify operations with a constant that may appear after instruction
482 // selection.
483 // TODO: See if a frame index with a fixed offset can fold.
485  const SIInstrInfo *TII,
486  MachineInstr *MI,
487  MachineOperand *ImmOp) {
488  unsigned Opc = MI->getOpcode();
489  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
490  Opc == AMDGPU::S_NOT_B32) {
491  MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
492  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
493  return true;
494  }
495 
496  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
497  if (Src1Idx == -1)
498  return false;
499 
500  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
501  MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
502  MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
503 
504  if (!Src0->isImm() && !Src1->isImm())
505  return false;
506 
507  // and k0, k1 -> v_mov_b32 (k0 & k1)
508  // or k0, k1 -> v_mov_b32 (k0 | k1)
509  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
510  if (Src0->isImm() && Src1->isImm()) {
511  int32_t NewImm;
512  if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
513  return false;
514 
515  const SIRegisterInfo &TRI = TII->getRegisterInfo();
516  bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
517 
518  // Be careful to change the right operand, src0 may belong to a different
519  // instruction.
520  MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
521  MI->RemoveOperand(Src1Idx);
522  mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
523  return true;
524  }
525 
526  if (!MI->isCommutable())
527  return false;
528 
529  if (Src0->isImm() && !Src1->isImm()) {
530  std::swap(Src0, Src1);
531  std::swap(Src0Idx, Src1Idx);
532  }
533 
534  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
535  if (Opc == AMDGPU::V_OR_B32_e64 ||
536  Opc == AMDGPU::V_OR_B32_e32 ||
537  Opc == AMDGPU::S_OR_B32) {
538  if (Src1Val == 0) {
539  // y = or x, 0 => y = copy x
540  MI->RemoveOperand(Src1Idx);
541  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
542  } else if (Src1Val == -1) {
543  // y = or x, -1 => y = v_mov_b32 -1
544  MI->RemoveOperand(Src1Idx);
545  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
546  } else
547  return false;
548 
549  return true;
550  }
551 
552  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
553  MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
554  MI->getOpcode() == AMDGPU::S_AND_B32) {
555  if (Src1Val == 0) {
556  // y = and x, 0 => y = v_mov_b32 0
557  MI->RemoveOperand(Src0Idx);
558  mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
559  } else if (Src1Val == -1) {
560  // y = and x, -1 => y = copy x
561  MI->RemoveOperand(Src1Idx);
562  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
564  } else
565  return false;
566 
567  return true;
568  }
569 
570  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
571  MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
572  MI->getOpcode() == AMDGPU::S_XOR_B32) {
573  if (Src1Val == 0) {
574  // y = xor x, 0 => y = copy x
575  MI->RemoveOperand(Src1Idx);
576  mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
577  return true;
578  }
579  }
580 
581  return false;
582 }
583 
584 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
585  MachineOperand &OpToFold) const {
586  // We need mutate the operands of new mov instructions to add implicit
587  // uses of EXEC, but adding them invalidates the use_iterator, so defer
588  // this.
589  SmallVector<MachineInstr *, 4> CopiesToReplace;
591  MachineOperand &Dst = MI.getOperand(0);
592 
593  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
594  if (FoldingImm) {
595  unsigned NumLiteralUses = 0;
596  MachineOperand *NonInlineUse = nullptr;
597  int NonInlineUseOpNo = -1;
598 
599  MachineRegisterInfo::use_iterator NextUse, NextInstUse;
601  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
602  Use != E; Use = NextUse) {
603  NextUse = std::next(Use);
604  MachineInstr *UseMI = Use->getParent();
605  unsigned OpNo = Use.getOperandNo();
606 
607  // Folding the immediate may reveal operations that can be constant
608  // folded or replaced with a copy. This can happen for example after
609  // frame indices are lowered to constants or from splitting 64-bit
610  // constants.
611  //
612  // We may also encounter cases where one or both operands are
613  // immediates materialized into a register, which would ordinarily not
614  // be folded due to multiple uses or operand constraints.
615 
616  if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
617  DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n');
618 
619  // Some constant folding cases change the same immediate's use to a new
620  // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
621  // again. The same constant folded instruction could also have a second
622  // use operand.
623  NextUse = MRI->use_begin(Dst.getReg());
624  continue;
625  }
626 
627  // Try to fold any inline immediate uses, and then only fold other
628  // constants if they have one use.
629  //
630  // The legality of the inline immediate must be checked based on the use
631  // operand, not the defining instruction, because 32-bit instructions
632  // with 32-bit inline immediate sources may be used to materialize
633  // constants used in 16-bit operands.
634  //
635  // e.g. it is unsafe to fold:
636  // s_mov_b32 s0, 1.0 // materializes 0x3f800000
637  // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
638 
639  // Folding immediates with more than one use will increase program size.
640  // FIXME: This will also reduce register usage, which may be better
641  // in some cases. A better heuristic is needed.
642  if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
643  foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
644  } else {
645  if (++NumLiteralUses == 1) {
646  NonInlineUse = &*Use;
647  NonInlineUseOpNo = OpNo;
648  }
649  }
650  }
651 
652  if (NumLiteralUses == 1) {
653  MachineInstr *UseMI = NonInlineUse->getParent();
654  foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
655  }
656  } else {
657  // Folding register.
659  Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
660  Use != E; ++Use) {
661  MachineInstr *UseMI = Use->getParent();
662 
663  foldOperand(OpToFold, UseMI, Use.getOperandNo(),
664  FoldList, CopiesToReplace);
665  }
666  }
667 
668  MachineFunction *MF = MI.getParent()->getParent();
669  // Make sure we add EXEC uses to any new v_mov instructions created.
670  for (MachineInstr *Copy : CopiesToReplace)
671  Copy->addImplicitDefUseOperands(*MF);
672 
673  for (FoldCandidate &Fold : FoldList) {
674  if (updateOperand(Fold, *TRI)) {
675  // Clear kill flags.
676  if (Fold.isReg()) {
677  assert(Fold.OpToFold && Fold.OpToFold->isReg());
678  // FIXME: Probably shouldn't bother trying to fold if not an
679  // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
680  // copies.
681  MRI->clearKillFlags(Fold.OpToFold->getReg());
682  }
683  DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
684  static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
685  }
686  }
687 }
688 
689 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
690  if (skipFunction(*MF.getFunction()))
691  return false;
692 
693  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
694 
695  MRI = &MF.getRegInfo();
696  TII = ST.getInstrInfo();
697  TRI = &TII->getRegisterInfo();
698 
699  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
700  BI != BE; ++BI) {
701 
702  MachineBasicBlock &MBB = *BI;
704  for (I = MBB.begin(); I != MBB.end(); I = Next) {
705  Next = std::next(I);
706  MachineInstr &MI = *I;
707 
708  if (!isSafeToFold(MI))
709  continue;
710 
711  MachineOperand &OpToFold = MI.getOperand(1);
712  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
713 
714  // FIXME: We could also be folding things like TargetIndexes.
715  if (!FoldingImm && !OpToFold.isReg())
716  continue;
717 
718  if (OpToFold.isReg() &&
720  continue;
721 
722  // Prevent folding operands backwards in the function. For example,
723  // the COPY opcode must not be replaced by 1 in this example:
724  //
725  // %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3
726  // ...
727  // %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use>
728  MachineOperand &Dst = MI.getOperand(0);
729  if (Dst.isReg() &&
731  continue;
732 
733  foldInstOperand(MI, OpToFold);
734  }
735  }
736  return false;
737 }
static bool isReg(const MCInst &MI, unsigned OpNo)
bool isImplicit() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:506
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:528
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SIInstrInfo * getInstrInfo() const override
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:353
bool isTied() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
unsigned getID() const
Return the register class ID number.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:225
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
static bool updateOperand(FoldCandidate &Fold, const TargetRegisterInfo &TRI)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
bool isUndef() const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:82
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const HexagonRegisterInfo & getRegisterInfo() const
HexagonInstrInfo specifics.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineBasicBlock * MBB
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
int64_t getImm() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:117
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
static const unsigned CommuteAnyOperandIndex
unsigned const MachineRegisterInfo * MRI
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:48
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineInstrBuilder & UseMI
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
bool isCopy() const
Definition: MachineInstr.h:807
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
unsigned getSubReg() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define DEBUG_TYPE
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:36
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
Class for arbitrary precision integers.
Definition: APInt.h:77
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:52
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isUseSafeToFold(const MachineInstr &MI, const MachineOperand &UseMO)
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
FunctionPass * createSIFoldOperandsPass()
static unsigned UseReg(const MachineOperand &MO)
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Abstract Stack Frame Index.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
const unsigned Kind
unsigned getReg() const
getReg - Returns the register number.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:633
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
aarch64 promote const
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
#define DEBUG(X)
Definition: Debug.h:100
static bool isSafeToFold(const MachineInstr &MI)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
bool isRegSequence() const
Definition: MachineInstr.h:801
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:491
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override