LLVM  3.7.0
SIFoldOperands.cpp
Go to the documentation of this file.
1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/Debug.h"
25 
26 #define DEBUG_TYPE "si-fold-operands"
27 using namespace llvm;
28 
29 namespace {
30 
31 class SIFoldOperands : public MachineFunctionPass {
32 public:
33  static char ID;
34 
35 public:
36  SIFoldOperands() : MachineFunctionPass(ID) {
38  }
39 
40  bool runOnMachineFunction(MachineFunction &MF) override;
41 
42  const char *getPassName() const override {
43  return "SI Fold Operands";
44  }
45 
46  void getAnalysisUsage(AnalysisUsage &AU) const override {
48  AU.setPreservesCFG();
50  }
51 };
52 
53 struct FoldCandidate {
54  MachineInstr *UseMI;
55  unsigned UseOpNo;
56  MachineOperand *OpToFold;
57  uint64_t ImmToFold;
58 
59  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
60  UseMI(MI), UseOpNo(OpNo) {
61 
62  if (FoldOp->isImm()) {
63  OpToFold = nullptr;
64  ImmToFold = FoldOp->getImm();
65  } else {
66  assert(FoldOp->isReg());
67  OpToFold = FoldOp;
68  }
69  }
70 
71  bool isImm() const {
72  return !OpToFold;
73  }
74 };
75 
76 } // End anonymous namespace.
77 
78 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
79  "SI Fold Operands", false, false)
82  "SI Fold Operands", false, false)
83 
84 char SIFoldOperands::ID = 0;
85 
86 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
87 
89  return new SIFoldOperands();
90 }
91 
92 static bool isSafeToFold(unsigned Opcode) {
93  switch(Opcode) {
94  case AMDGPU::V_MOV_B32_e32:
95  case AMDGPU::V_MOV_B32_e64:
96  case AMDGPU::V_MOV_B64_PSEUDO:
97  case AMDGPU::S_MOV_B32:
98  case AMDGPU::S_MOV_B64:
99  case AMDGPU::COPY:
100  return true;
101  default:
102  return false;
103  }
104 }
105 
106 static bool updateOperand(FoldCandidate &Fold,
107  const TargetRegisterInfo &TRI) {
108  MachineInstr *MI = Fold.UseMI;
109  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
110  assert(Old.isReg());
111 
112  if (Fold.isImm()) {
113  Old.ChangeToImmediate(Fold.ImmToFold);
114  return true;
115  }
116 
117  MachineOperand *New = Fold.OpToFold;
120  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
121  return true;
122  }
123 
124  // FIXME: Handle physical registers.
125 
126  return false;
127 }
128 
129 static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
130  const MachineInstr *MI) {
131  for (auto Candidate : FoldList) {
132  if (Candidate.UseMI == MI)
133  return true;
134  }
135  return false;
136 }
137 
138 static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
139  MachineInstr *MI, unsigned OpNo,
140  MachineOperand *OpToFold,
141  const SIInstrInfo *TII) {
142  if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
143 
144  // Special case for v_mac_f32_e64 if we are trying to fold into src2
145  unsigned Opc = MI->getOpcode();
146  if (Opc == AMDGPU::V_MAC_F32_e64 &&
147  (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
148  // Check if changing this to a v_mad_f32 instruction will allow us to
149  // fold the operand.
150  MI->setDesc(TII->get(AMDGPU::V_MAD_F32));
151  bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
152  if (FoldAsMAD) {
153  MI->untieRegOperand(OpNo);
154  return true;
155  }
156  MI->setDesc(TII->get(Opc));
157  }
158 
159  // If we are already folding into another operand of MI, then
160  // we can't commute the instruction, otherwise we risk making the
161  // other fold illegal.
162  if (isUseMIInFoldList(FoldList, MI))
163  return false;
164 
165  // Operand is not legal, so try to commute the instruction to
166  // see if this makes it possible to fold.
167  unsigned CommuteIdx0;
168  unsigned CommuteIdx1;
169  bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
170 
171  if (CanCommute) {
172  if (CommuteIdx0 == OpNo)
173  OpNo = CommuteIdx1;
174  else if (CommuteIdx1 == OpNo)
175  OpNo = CommuteIdx0;
176  }
177 
178  if (!CanCommute || !TII->commuteInstruction(MI))
179  return false;
180 
181  if (!TII->isOperandLegal(MI, OpNo, OpToFold))
182  return false;
183  }
184 
185  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
186  return true;
187 }
188 
189 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
190  MachineRegisterInfo &MRI = MF.getRegInfo();
191  const SIInstrInfo *TII =
192  static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
193  const SIRegisterInfo &TRI = TII->getRegisterInfo();
194 
195  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
196  BI != BE; ++BI) {
197 
198  MachineBasicBlock &MBB = *BI;
200  for (I = MBB.begin(); I != MBB.end(); I = Next) {
201  Next = std::next(I);
202  MachineInstr &MI = *I;
203 
204  if (!isSafeToFold(MI.getOpcode()))
205  continue;
206 
207  unsigned OpSize = TII->getOpSize(MI, 1);
208  MachineOperand &OpToFold = MI.getOperand(1);
209  bool FoldingImm = OpToFold.isImm();
210 
211  // FIXME: We could also be folding things like FrameIndexes and
212  // TargetIndexes.
213  if (!FoldingImm && !OpToFold.isReg())
214  continue;
215 
216  // Folding immediates with more than one use will increase program size.
217  // FIXME: This will also reduce register usage, which may be better
218  // in some cases. A better heuristic is needed.
219  if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
220  !MRI.hasOneUse(MI.getOperand(0).getReg()))
221  continue;
222 
223  // FIXME: Fold operands with subregs.
224  if (OpToFold.isReg() &&
226  OpToFold.getSubReg()))
227  continue;
228 
229  std::vector<FoldCandidate> FoldList;
231  Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
232  Use != E; ++Use) {
233 
234  MachineInstr *UseMI = Use->getParent();
235  const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
236 
237  // FIXME: Fold operands with subregs.
238  if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
239  UseOp.isImplicit())) {
240  continue;
241  }
242 
243  APInt Imm;
244 
245  if (FoldingImm) {
246  unsigned UseReg = UseOp.getReg();
247  const TargetRegisterClass *UseRC
249  MRI.getRegClass(UseReg) :
250  TRI.getPhysRegClass(UseReg);
251 
252  Imm = APInt(64, OpToFold.getImm());
253 
254  // Split 64-bit constants into 32-bits for folding.
255  if (UseOp.getSubReg()) {
256  if (UseRC->getSize() != 8)
257  continue;
258 
259  if (UseOp.getSubReg() == AMDGPU::sub0) {
260  Imm = Imm.getLoBits(32);
261  } else {
262  assert(UseOp.getSubReg() == AMDGPU::sub1);
263  Imm = Imm.getHiBits(32);
264  }
265  }
266 
267  // In order to fold immediates into copies, we need to change the
268  // copy to a MOV.
269  if (UseMI->getOpcode() == AMDGPU::COPY) {
270  unsigned DestReg = UseMI->getOperand(0).getReg();
271  const TargetRegisterClass *DestRC
273  MRI.getRegClass(DestReg) :
274  TRI.getPhysRegClass(DestReg);
275 
276  unsigned MovOp = TII->getMovOpcode(DestRC);
277  if (MovOp == AMDGPU::COPY)
278  continue;
279 
280  UseMI->setDesc(TII->get(MovOp));
281  }
282  }
283 
284  const MCInstrDesc &UseDesc = UseMI->getDesc();
285 
286  // Don't fold into target independent nodes. Target independent opcodes
287  // don't have defined register classes.
288  if (UseDesc.isVariadic() ||
289  UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
290  continue;
291 
292  if (FoldingImm) {
294  tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
295  continue;
296  }
297 
298  tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
299 
300  // FIXME: We could try to change the instruction from 64-bit to 32-bit
301  // to enable more folding opportunites. The shrink operands pass
302  // already does this.
303  }
304 
305  for (FoldCandidate &Fold : FoldList) {
306  if (updateOperand(Fold, TRI)) {
307  // Clear kill flags.
308  if (!Fold.isImm()) {
309  assert(Fold.OpToFold && Fold.OpToFold->isReg());
310  Fold.OpToFold->setIsKill(false);
311  }
312  DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
313  Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
314  }
315  }
316  }
317  }
318  return false;
319 }
bool isImplicit() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
const SIRegisterInfo & getRegisterInfo() const override
Definition: SIInstrInfo.h:71
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:138
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
MachineInstr * commuteInstruction(MachineInstr *MI, bool NewMI=false) const override
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:200
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
unsigned getSize() const
getSize - Return the size of the register in bytes, which is also the size of a stack slot allocated ...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
static use_iterator use_end()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
static bool updateOperand(FoldCandidate &Fold, const TargetRegisterInfo &TRI)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const TargetRegisterClass * getRegClass(unsigned Reg) const
getRegClass - Return the register class of the specified virtual register.
static bool isUseMIInFoldList(const std::vector< FoldCandidate > &FoldList, const MachineInstr *MI)
SI Fold false
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
int64_t getImm() const
bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bundle_iterator< MachineInstr, instr_iterator > iterator
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
Definition: SIInstrInfo.h:267
char & SIFoldOperandsID
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:48
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:689
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
SI Fold Operands
unsigned getSubReg() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define DEBUG_TYPE
bool isInlineConstant(const APInt &Imm) const
bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
APInt getHiBits(unsigned numBits) const
Compute an APInt containing numBits highbits from this APInt.
Definition: APInt.cpp:684
MachineOperand class - Representation of each machine instruction operand.
INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,"SI Fold Operands", false, false) INITIALIZE_PASS_END(SIFoldOperands
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
static bool tryAddToFoldList(std::vector< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
Class for arbitrary precision integers.
Definition: APInt.h:73
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
Representation of each machine instruction.
Definition: MachineInstr.h:51
void initializeSIFoldOperandsPass(PassRegistry &)
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:62
FunctionPass * createSIFoldOperandsPass()
use_iterator use_begin(unsigned RegNo) const
static unsigned UseReg(const MachineOperand &MO)
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
static bool isSafeToFold(unsigned Opcode)
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:149
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:92
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...