LLVM  3.7.0
SIShrinkInstructions.cpp
Go to the documentation of this file.
1 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// The pass tries to use the 32-bit encoding for instructions when possible.
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUMCInstLower.h"
14 #include "AMDGPUSubtarget.h"
15 #include "SIInstrInfo.h"
16 #include "llvm/ADT/Statistic.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/LLVMContext.h"
23 #include "llvm/Support/Debug.h"
26 
27 #define DEBUG_TYPE "si-shrink-instructions"
28 
29 STATISTIC(NumInstructionsShrunk,
30  "Number of 64-bit instruction reduced to 32-bit.");
31 STATISTIC(NumLiteralConstantsFolded,
32  "Number of literal constants folded into 32-bit instructions.");
33 
34 namespace llvm {
36 }
37 
38 using namespace llvm;
39 
40 namespace {
41 
42 class SIShrinkInstructions : public MachineFunctionPass {
43 public:
44  static char ID;
45 
46 public:
47  SIShrinkInstructions() : MachineFunctionPass(ID) {
48  }
49 
50  bool runOnMachineFunction(MachineFunction &MF) override;
51 
52  const char *getPassName() const override {
53  return "SI Shrink Instructions";
54  }
55 
56  void getAnalysisUsage(AnalysisUsage &AU) const override {
57  AU.setPreservesCFG();
59  }
60 };
61 
62 } // End anonymous namespace.
63 
64 INITIALIZE_PASS_BEGIN(SIShrinkInstructions, DEBUG_TYPE,
65  "SI Lower il Copies", false, false)
66 INITIALIZE_PASS_END(SIShrinkInstructions, DEBUG_TYPE,
67  "SI Lower il Copies", false, false)
68 
69 char SIShrinkInstructions::ID = 0;
70 
72  return new SIShrinkInstructions();
73 }
74 
75 static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
76  const MachineRegisterInfo &MRI) {
77  if (!MO->isReg())
78  return false;
79 
81  return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
82 
83  return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
84 }
85 
86 static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
87  const SIRegisterInfo &TRI,
88  const MachineRegisterInfo &MRI) {
89 
90  const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
91  // Can't shrink instruction with three operands.
92  // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
93  // a special case for it. It can only be shrunk if the third operand
94  // is vcc. We should handle this the same way we handle vopc, by addding
95  // a register allocation hint pre-regalloc and then do the shrining
96  // post-regalloc.
97  if (Src2) {
98  switch (MI.getOpcode()) {
99  default: return false;
100 
101  case AMDGPU::V_MAC_F32_e64:
102  if (!isVGPR(Src2, TRI, MRI) ||
103  TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
104  return false;
105  break;
106 
107  case AMDGPU::V_CNDMASK_B32_e64:
108  break;
109  }
110  }
111 
112  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
113  const MachineOperand *Src1Mod =
114  TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
115 
116  if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
117  return false;
118 
119  // We don't need to check src0, all input types are legal, so just make sure
120  // src0 isn't using any modifiers.
121  if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
122  return false;
123 
124  // Check output modifiers
125  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
126  return false;
127 
128  if (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
129  return false;
130 
131  return true;
132 }
133 
134 /// \brief This function checks \p MI for operands defined by a move immediate
135 /// instruction and then folds the literal constant into the instruction if it
136 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction
137 /// and will only fold literal constants if we are still in SSA.
139  MachineRegisterInfo &MRI, bool TryToCommute = true) {
140 
141  if (!MRI.isSSA())
142  return;
143 
144  assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) ||
145  TII->isVOPC(MI.getOpcode()));
146 
147  const SIRegisterInfo &TRI = TII->getRegisterInfo();
148  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
149  MachineOperand &Src0 = MI.getOperand(Src0Idx);
150 
151  // Only one literal constant is allowed per instruction, so if src0 is a
152  // literal constant then we can't do any folding.
153  if (Src0.isImm() &&
154  TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
155  return;
156 
157  // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
158  // SGPR, we cannot commute the instruction, so we can't fold any literal
159  // constants.
160  if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
161  return;
162 
163  // Try to fold Src0
164  if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
165  unsigned Reg = Src0.getReg();
166  MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
167  if (Def && Def->isMoveImmediate()) {
168  MachineOperand &MovSrc = Def->getOperand(1);
169  bool ConstantFolded = false;
170 
171  if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
172  Src0.ChangeToImmediate(MovSrc.getImm());
173  ConstantFolded = true;
174  }
175  if (ConstantFolded) {
176  if (MRI.use_empty(Reg))
177  Def->eraseFromParent();
178  ++NumLiteralConstantsFolded;
179  return;
180  }
181  }
182  }
183 
184  // We have failed to fold src0, so commute the instruction and try again.
185  if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI))
186  foldImmediates(MI, TII, MRI, false);
187 
188 }
189 
190 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
191  MachineRegisterInfo &MRI = MF.getRegInfo();
192  const SIInstrInfo *TII =
193  static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
194  const SIRegisterInfo &TRI = TII->getRegisterInfo();
195  std::vector<unsigned> I1Defs;
196 
197  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
198  BI != BE; ++BI) {
199 
200  MachineBasicBlock &MBB = *BI;
202  for (I = MBB.begin(); I != MBB.end(); I = Next) {
203  Next = std::next(I);
204  MachineInstr &MI = *I;
205 
206  // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
207  if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
208  const MachineOperand &Src = MI.getOperand(1);
209 
210  if (Src.isImm()) {
211  if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
212  MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
213  }
214 
215  continue;
216  }
217 
218  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
219  continue;
220 
221  if (!canShrink(MI, TII, TRI, MRI)) {
222  // Try commuting the instruction and see if that enables us to shrink
223  // it.
224  if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
225  !canShrink(MI, TII, TRI, MRI))
226  continue;
227  }
228 
229  // getVOPe32 could be -1 here if we started with an instruction that had
230  // a 32-bit encoding and then commuted it to an instruction that did not.
231  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
232  continue;
233 
234  int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
235 
236  if (TII->isVOPC(Op32)) {
237  unsigned DstReg = MI.getOperand(0).getReg();
239  // VOPC instructions can only write to the VCC register. We can't
240  // force them to use VCC here, because the register allocator has
241  // trouble with sequences like this, which cause the allocator to run
242  // out of registers if vreg0 and vreg1 belong to the VCCReg register
243  // class:
244  // vreg0 = VOPC;
245  // vreg1 = VOPC;
246  // S_AND_B64 vreg0, vreg1
247  //
248  // So, instead of forcing the instruction to write to VCC, we provide
249  // a hint to the register allocator to use VCC and then we we will run
250  // this pass again after RA and shrink it if it outputs to VCC.
251  MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
252  continue;
253  }
254  if (DstReg != AMDGPU::VCC)
255  continue;
256  }
257 
258  if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
259  // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
260  // instructions.
261  const MachineOperand *Src2 =
262  TII->getNamedOperand(MI, AMDGPU::OpName::src2);
263  if (!Src2->isReg())
264  continue;
265  unsigned SReg = Src2->getReg();
267  MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
268  continue;
269  }
270  if (SReg != AMDGPU::VCC)
271  continue;
272  }
273 
274  // We can shrink this instruction
275  DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
276 
277  MachineInstrBuilder Inst32 =
278  BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
279 
280  // dst
281  Inst32.addOperand(MI.getOperand(0));
282 
283  Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
284 
285  const MachineOperand *Src1 =
286  TII->getNamedOperand(MI, AMDGPU::OpName::src1);
287  if (Src1)
288  Inst32.addOperand(*Src1);
289 
290  const MachineOperand *Src2 =
291  TII->getNamedOperand(MI, AMDGPU::OpName::src2);
292  if (Src2)
293  Inst32.addOperand(*Src2);
294 
295  ++NumInstructionsShrunk;
296  MI.eraseFromParent();
297 
298  foldImmediates(*Inst32, TII, MRI);
299  DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
300 
301 
302  }
303  }
304  return false;
305 }
AMDGPU specific subclass of TargetSubtarget.
static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
FunctionPass * createSIShrinkInstructionsPass()
const SIRegisterInfo & getRegisterInfo() const override
Definition: SIInstrInfo.h:71
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
bool hasVGPRs(const TargetRegisterClass *RC) const
MachineInstr * commuteInstruction(MachineInstr *MI, bool NewMI=false) const override
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getRegClass(unsigned Reg) const
getRegClass - Return the register class of the specified virtual register.
Reg
All possible values of the reg field in the ModR/M byte.
static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
int64_t getImm() const
INITIALIZE_PASS_BEGIN(SIShrinkInstructions, DEBUG_TYPE,"SI Lower il Copies", false, false) INITIALIZE_PASS_END(SIShrinkInstructions
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bundle_iterator< MachineInstr, instr_iterator > iterator
STATISTIC(NumInstructionsShrunk,"Number of 64-bit instruction reduced to 32-bit.")
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
Definition: SIInstrInfo.h:267
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
void initializeSIShrinkInstructionsPass(PassRegistry &)
bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const
int getVOPe32(uint16_t Opcode)
bool isVOP1(uint16_t Opcode) const
Definition: SIInstrInfo.h:179
bool isInlineConstant(const APInt &Imm) const
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
MachineOperand class - Representation of each machine instruction operand.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register...
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
bool isVOP2(uint16_t Opcode) const
Definition: SIInstrInfo.h:183
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void dump() const
bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:302
SI Lower il false
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
SI Lower il Copies
Representation of each machine instruction.
Definition: MachineInstr.h:51
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:54
bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:272
unsigned getReg() const
getReg - Returns the register number.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:607
virtual const TargetInstrInfo * getInstrInfo() const
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
BasicBlockListType::iterator iterator
bool isVOPC(uint16_t Opcode) const
Definition: SIInstrInfo.h:191
#define DEBUG(X)
Definition: Debug.h:92
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:41
static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute=true)
This function checks MI for operands defined by a move immediate instruction and then folds the liter...
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:470
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.