LLVM  8.0.0svn
SIShrinkInstructions.cpp
Go to the documentation of this file.
1 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// The pass tries to use the 32-bit encoding for instructions when possible.
9 //===----------------------------------------------------------------------===//
10 //
11 
12 #include "AMDGPU.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
16 #include "llvm/ADT/Statistic.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/LLVMContext.h"
23 #include "llvm/Support/Debug.h"
26 
27 #define DEBUG_TYPE "si-shrink-instructions"
28 
29 STATISTIC(NumInstructionsShrunk,
30  "Number of 64-bit instruction reduced to 32-bit.");
31 STATISTIC(NumLiteralConstantsFolded,
32  "Number of literal constants folded into 32-bit instructions.");
33 
34 using namespace llvm;
35 
36 namespace {
37 
38 class SIShrinkInstructions : public MachineFunctionPass {
39 public:
40  static char ID;
41 
42 public:
43  SIShrinkInstructions() : MachineFunctionPass(ID) {
44  }
45 
46  bool runOnMachineFunction(MachineFunction &MF) override;
47 
48  StringRef getPassName() const override { return "SI Shrink Instructions"; }
49 
50  void getAnalysisUsage(AnalysisUsage &AU) const override {
51  AU.setPreservesCFG();
53  }
54 };
55 
56 } // End anonymous namespace.
57 
58 INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
59  "SI Shrink Instructions", false, false)
60 
61 char SIShrinkInstructions::ID = 0;
62 
64  return new SIShrinkInstructions();
65 }
66 
67 /// This function checks \p MI for operands defined by a move immediate
68 /// instruction and then folds the literal constant into the instruction if it
69 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
71  MachineRegisterInfo &MRI, bool TryToCommute = true) {
72  assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
73 
74  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
75 
76  // Try to fold Src0
77  MachineOperand &Src0 = MI.getOperand(Src0Idx);
78  if (Src0.isReg()) {
79  unsigned Reg = Src0.getReg();
80  if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) {
81  MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
82  if (Def && Def->isMoveImmediate()) {
83  MachineOperand &MovSrc = Def->getOperand(1);
84  bool ConstantFolded = false;
85 
86  if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
87  isUInt<32>(MovSrc.getImm()))) {
88  // It's possible to have only one component of a super-reg defined by
89  // a single mov, so we need to clear any subregister flag.
90  Src0.setSubReg(0);
91  Src0.ChangeToImmediate(MovSrc.getImm());
92  ConstantFolded = true;
93  } else if (MovSrc.isFI()) {
94  Src0.setSubReg(0);
95  Src0.ChangeToFrameIndex(MovSrc.getIndex());
96  ConstantFolded = true;
97  }
98 
99  if (ConstantFolded) {
100  assert(MRI.use_empty(Reg));
101  Def->eraseFromParent();
102  ++NumLiteralConstantsFolded;
103  return true;
104  }
105  }
106  }
107  }
108 
109  // We have failed to fold src0, so commute the instruction and try again.
110  if (TryToCommute && MI.isCommutable()) {
111  if (TII->commuteInstruction(MI)) {
112  if (foldImmediates(MI, TII, MRI, false))
113  return true;
114 
115  // Commute back.
116  TII->commuteInstruction(MI);
117  }
118  }
119 
120  return false;
121 }
122 
123 static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
124  return isInt<16>(Src.getImm()) &&
125  !TII->isInlineConstant(*Src.getParent(),
126  Src.getParent()->getOperandNo(&Src));
127 }
128 
129 static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
130  return isUInt<16>(Src.getImm()) &&
131  !TII->isInlineConstant(*Src.getParent(),
132  Src.getParent()->getOperandNo(&Src));
133 }
134 
136  const MachineOperand &Src,
137  bool &IsUnsigned) {
138  if (isInt<16>(Src.getImm())) {
139  IsUnsigned = false;
140  return !TII->isInlineConstant(Src);
141  }
142 
143  if (isUInt<16>(Src.getImm())) {
144  IsUnsigned = true;
145  return !TII->isInlineConstant(Src);
146  }
147 
148  return false;
149 }
150 
151 /// \returns true if the constant in \p Src should be replaced with a bitreverse
152 /// of an inline immediate.
153 static bool isReverseInlineImm(const SIInstrInfo *TII,
154  const MachineOperand &Src,
155  int32_t &ReverseImm) {
156  if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
157  return false;
158 
159  ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
160  return ReverseImm >= -16 && ReverseImm <= 64;
161 }
162 
163 /// Copy implicit register operands from specified instruction to this
164 /// instruction that are not part of the instruction definition.
166  const MachineInstr &MI) {
167  for (unsigned i = MI.getDesc().getNumOperands() +
168  MI.getDesc().getNumImplicitUses() +
169  MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
170  i != e; ++i) {
171  const MachineOperand &MO = MI.getOperand(i);
172  if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
173  NewMI.addOperand(MF, MO);
174  }
175 }
176 
178  // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
179  // get constants on the RHS.
180  if (!MI.getOperand(0).isReg())
181  TII->commuteInstruction(MI, false, 0, 1);
182 
183  const MachineOperand &Src1 = MI.getOperand(1);
184  if (!Src1.isImm())
185  return;
186 
187  int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
188  if (SOPKOpc == -1)
189  return;
190 
191  // eq/ne is special because the imm16 can be treated as signed or unsigned,
192  // and initially selectd to the unsigned versions.
193  if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
194  bool HasUImm;
195  if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
196  if (!HasUImm) {
197  SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
198  AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
199  }
200 
201  MI.setDesc(TII->get(SOPKOpc));
202  }
203 
204  return;
205  }
206 
207  const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
208 
209  if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
210  (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
211  MI.setDesc(NewDesc);
212  }
213 }
214 
215 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
216  if (skipFunction(MF.getFunction()))
217  return false;
218 
220  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
221  const SIInstrInfo *TII = ST.getInstrInfo();
222 
223  std::vector<unsigned> I1Defs;
224 
225  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
226  BI != BE; ++BI) {
227 
228  MachineBasicBlock &MBB = *BI;
230  for (I = MBB.begin(); I != MBB.end(); I = Next) {
231  Next = std::next(I);
232  MachineInstr &MI = *I;
233 
234  if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
235  // If this has a literal constant source that is the same as the
236  // reversed bits of an inline immediate, replace with a bitreverse of
237  // that constant. This saves 4 bytes in the common case of materializing
238  // sign bits.
239 
240  // Test if we are after regalloc. We only want to do this after any
241  // optimizations happen because this will confuse them.
242  // XXX - not exactly a check for post-regalloc run.
243  MachineOperand &Src = MI.getOperand(1);
244  if (Src.isImm() &&
246  int32_t ReverseImm;
247  if (isReverseInlineImm(TII, Src, ReverseImm)) {
248  MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
249  Src.setImm(ReverseImm);
250  continue;
251  }
252  }
253  }
254 
255  // Combine adjacent s_nops to use the immediate operand encoding how long
256  // to wait.
257  //
258  // s_nop N
259  // s_nop M
260  // =>
261  // s_nop (N + M)
262  if (MI.getOpcode() == AMDGPU::S_NOP &&
263  Next != MBB.end() &&
264  (*Next).getOpcode() == AMDGPU::S_NOP) {
265 
266  MachineInstr &NextMI = *Next;
267  // The instruction encodes the amount to wait with an offset of 1,
268  // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
269  // after adding.
270  uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
271  uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
272 
273  // Make sure we don't overflow the bounds.
274  if (Nop0 + Nop1 <= 8) {
275  NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
276  MI.eraseFromParent();
277  }
278 
279  continue;
280  }
281 
282  // FIXME: We also need to consider movs of constant operands since
283  // immediate operands are not folded if they have more than one use, and
284  // the operand folding pass is unaware if the immediate will be free since
285  // it won't know if the src == dest constraint will end up being
286  // satisfied.
287  if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
288  MI.getOpcode() == AMDGPU::S_MUL_I32) {
289  const MachineOperand *Dest = &MI.getOperand(0);
290  MachineOperand *Src0 = &MI.getOperand(1);
291  MachineOperand *Src1 = &MI.getOperand(2);
292 
293  if (!Src0->isReg() && Src1->isReg()) {
294  if (TII->commuteInstruction(MI, false, 1, 2))
295  std::swap(Src0, Src1);
296  }
297 
298  // FIXME: This could work better if hints worked with subregisters. If
299  // we have a vector add of a constant, we usually don't get the correct
300  // allocation due to the subregister usage.
302  Src0->isReg()) {
303  MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
304  MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
305  continue;
306  }
307 
308  if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
309  if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
310  unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
311  AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
312 
313  MI.setDesc(TII->get(Opc));
314  MI.tieOperands(0, 1);
315  }
316  }
317  }
318 
319  // Try to use s_cmpk_*
320  if (MI.isCompare() && TII->isSOPC(MI)) {
321  shrinkScalarCompare(TII, MI);
322  continue;
323  }
324 
325  // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
326  if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
327  const MachineOperand &Dst = MI.getOperand(0);
328  MachineOperand &Src = MI.getOperand(1);
329 
330  if (Src.isImm() &&
332  int32_t ReverseImm;
333  if (isKImmOperand(TII, Src))
334  MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
335  else if (isReverseInlineImm(TII, Src, ReverseImm)) {
336  MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
337  Src.setImm(ReverseImm);
338  }
339  }
340 
341  continue;
342  }
343 
344  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
345  continue;
346 
347  if (!TII->canShrink(MI, MRI)) {
348  // Try commuting the instruction and see if that enables us to shrink
349  // it.
350  if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
351  !TII->canShrink(MI, MRI))
352  continue;
353  }
354 
355  // getVOPe32 could be -1 here if we started with an instruction that had
356  // a 32-bit encoding and then commuted it to an instruction that did not.
357  if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
358  continue;
359 
360  int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
361 
362  if (TII->isVOPC(Op32)) {
363  unsigned DstReg = MI.getOperand(0).getReg();
365  // VOPC instructions can only write to the VCC register. We can't
366  // force them to use VCC here, because this is only one register and
367  // cannot deal with sequences which would require multiple copies of
368  // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
369  //
370  // So, instead of forcing the instruction to write to VCC, we provide
371  // a hint to the register allocator to use VCC and then we will run
372  // this pass again after RA and shrink it if it outputs to VCC.
373  MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
374  continue;
375  }
376  if (DstReg != AMDGPU::VCC)
377  continue;
378  }
379 
380  if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
381  // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
382  // instructions.
383  const MachineOperand *Src2 =
384  TII->getNamedOperand(MI, AMDGPU::OpName::src2);
385  if (!Src2->isReg())
386  continue;
387  unsigned SReg = Src2->getReg();
389  MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
390  continue;
391  }
392  if (SReg != AMDGPU::VCC)
393  continue;
394  }
395 
396  // Check for the bool flag output for instructions like V_ADD_I32_e64.
397  const MachineOperand *SDst = TII->getNamedOperand(MI,
398  AMDGPU::OpName::sdst);
399 
400  // Check the carry-in operand for v_addc_u32_e64.
401  const MachineOperand *Src2 = TII->getNamedOperand(MI,
402  AMDGPU::OpName::src2);
403 
404  if (SDst) {
405  if (SDst->getReg() != AMDGPU::VCC) {
407  MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
408  continue;
409  }
410 
411  // All of the instructions with carry outs also have an SGPR input in
412  // src2.
413  if (Src2 && Src2->getReg() != AMDGPU::VCC) {
415  MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
416 
417  continue;
418  }
419  }
420 
421  // We can shrink this instruction
422  LLVM_DEBUG(dbgs() << "Shrinking " << MI);
423 
424  MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
425  ++NumInstructionsShrunk;
426 
427  // Copy extra operands not present in the instruction definition.
428  copyExtraImplicitOps(*Inst32, MF, MI);
429 
430  MI.eraseFromParent();
431  foldImmediates(*Inst32, TII, MRI);
432 
433  LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
434  }
435  }
436  return false;
437 }
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:521
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
static bool isReverseInlineImm(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ReverseImm)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
AMDGPU specific subclass of TargetSubtarget.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:543
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool sopkIsZext(const MachineInstr &MI)
Definition: SIInstrInfo.h:551
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:509
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
const SIInstrInfo * getInstrInfo() const override
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:700
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:412
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src, bool &IsUnsigned)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit.")
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
unsigned const MachineRegisterInfo * MRI
static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:694
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Represent the analysis usage information of a pass.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static bool isVOP2(const MachineInstr &MI)
Definition: SIInstrInfo.h:377
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
MachineOperand class - Representation of each machine instruction operand.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register...
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction that are not part of t...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Interface definition for SIInstrInfo.
FunctionPass * createSIShrinkInstructionsPass()
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isVOPC(const MachineInstr &MI)
Definition: SIInstrInfo.h:401
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
static bool isVOP1(const MachineInstr &MI)
Definition: SIInstrInfo.h:369
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:848
static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute=true)
This function checks MI for operands defined by a move immediate instruction and then folds the liter...
bool isImplicit() const
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.