LLVM  4.0.0
BypassSlowDivision.cpp
Go to the documentation of this file.
1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
15 //
16 //===----------------------------------------------------------------------===//
17 
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instructions.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "bypass-slow-division"
28 
29 namespace {
30  struct DivOpInfo {
31  bool SignedOp;
32  Value *Dividend;
33  Value *Divisor;
34 
35  DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
36  : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
37  };
38 
39  struct DivPhiNodes {
40  PHINode *Quotient;
41  PHINode *Remainder;
42 
43  DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
44  : Quotient(InQuotient), Remainder(InRemainder) {}
45  };
46 }
47 
48 namespace llvm {
49  template<>
50  struct DenseMapInfo<DivOpInfo> {
51  static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
52  return Val1.SignedOp == Val2.SignedOp &&
53  Val1.Dividend == Val2.Dividend &&
54  Val1.Divisor == Val2.Divisor;
55  }
56 
57  static DivOpInfo getEmptyKey() {
58  return DivOpInfo(false, nullptr, nullptr);
59  }
60 
61  static DivOpInfo getTombstoneKey() {
62  return DivOpInfo(true, nullptr, nullptr);
63  }
64 
65  static unsigned getHashValue(const DivOpInfo &Val) {
66  return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
67  reinterpret_cast<uintptr_t>(Val.Divisor)) ^
68  (unsigned)Val.SignedOp;
69  }
70  };
71 
73 }
74 
75 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
76 // value of the operands and uses a shorter-faster div/rem instruction when
77 // possible and the longer-slower div/rem instruction otherwise.
78 static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
79  bool UseDivOp, bool UseSignedOp,
80  DivCacheTy &PerBBDivCache) {
81  Function *F = I->getParent()->getParent();
82  // Get instruction operands
83  Value *Dividend = I->getOperand(0);
84  Value *Divisor = I->getOperand(1);
85 
86  if (isa<ConstantInt>(Divisor)) {
87  // Division by a constant should have been been solved and replaced earlier
88  // in the pipeline.
89  return false;
90  }
91 
92  // If the numerator is a constant, bail if it doesn't fit into BypassType.
93  if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
94  if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
95  return false;
96 
97  // Basic Block is split before divide
98  BasicBlock *MainBB = &*I->getParent();
99  BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
100 
101  // Add new basic block for slow divide operation
102  BasicBlock *SlowBB =
103  BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
104  SlowBB->moveBefore(SuccessorBB);
105  IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
106  Value *SlowQuotientV;
107  Value *SlowRemainderV;
108  if (UseSignedOp) {
109  SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
110  SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
111  } else {
112  SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
113  SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
114  }
115  SlowBuilder.CreateBr(SuccessorBB);
116 
117  // Add new basic block for fast divide operation
118  BasicBlock *FastBB =
119  BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
120  FastBB->moveBefore(SlowBB);
121  IRBuilder<> FastBuilder(FastBB, FastBB->begin());
122  Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
123  BypassType);
124  Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
125  BypassType);
126 
127  // udiv/urem because optimization only handles positive numbers
128  Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
129  Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
130  ShortDivisorV);
131  Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
132  ShortQuotientV,
133  Dividend->getType());
134  Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
135  ShortRemainderV,
136  Dividend->getType());
137  FastBuilder.CreateBr(SuccessorBB);
138 
139  // Phi nodes for result of div and rem
140  IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
141  PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
142  QuoPhi->addIncoming(SlowQuotientV, SlowBB);
143  QuoPhi->addIncoming(FastQuotientV, FastBB);
144  PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
145  RemPhi->addIncoming(SlowRemainderV, SlowBB);
146  RemPhi->addIncoming(FastRemainderV, FastBB);
147 
148  // Replace I with appropriate phi node
149  if (UseDivOp)
150  I->replaceAllUsesWith(QuoPhi);
151  else
152  I->replaceAllUsesWith(RemPhi);
153  I->eraseFromParent();
154 
155  // Combine operands into a single value with OR for value testing below
156  MainBB->getInstList().back().eraseFromParent();
157  IRBuilder<> MainBuilder(MainBB, MainBB->end());
158 
159  // We should have bailed out above if the divisor is a constant, but the
160  // dividend may still be a constant. Set OrV to our non-constant operands
161  // OR'ed together.
162  assert(!isa<ConstantInt>(Divisor));
163 
164  Value *OrV;
165  if (!isa<ConstantInt>(Dividend))
166  OrV = MainBuilder.CreateOr(Dividend, Divisor);
167  else
168  OrV = Divisor;
169 
170  // BitMask is inverted to check if the operands are
171  // larger than the bypass type
172  uint64_t BitMask = ~BypassType->getBitMask();
173  Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
174 
175  // Compare operand values and branch
176  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
177  Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
178  MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
179 
180  // Cache phi nodes to be used later in place of other instances
181  // of div or rem with the same sign, dividend, and divisor
182  DivOpInfo Key(UseSignedOp, Dividend, Divisor);
183  DivPhiNodes Value(QuoPhi, RemPhi);
184  PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
185  return true;
186 }
187 
188 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
189 // the current BB if operands and operation are identical. Otherwise calls
190 // insertFastDiv to perform the optimization and caches the resulting dividend
191 // and remainder.
192 static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
193  bool UseDivOp, bool UseSignedOp,
194  DivCacheTy &PerBBDivCache) {
195  // Get instruction operands
196  DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
197  DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
198 
199  if (CacheI == PerBBDivCache.end()) {
200  // If previous instance does not exist, insert fast div
201  return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
202  }
203 
204  // Replace operation value with previously generated phi node
205  DivPhiNodes &Value = CacheI->second;
206  if (UseDivOp) {
207  // Replace all uses of div instruction with quotient phi node
208  I->replaceAllUsesWith(Value.Quotient);
209  } else {
210  // Replace all uses of rem instruction with remainder phi node
211  I->replaceAllUsesWith(Value.Remainder);
212  }
213 
214  // Remove redundant operation
215  I->eraseFromParent();
216  return true;
217 }
218 
219 // bypassSlowDivision - This optimization identifies DIV instructions in a BB
220 // that can be profitably bypassed and carried out with a shorter, faster
221 // divide.
223  BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
224  DivCacheTy DivCache;
225 
226  bool MadeChange = false;
227  Instruction* Next = &*BB->begin();
228  while (Next != nullptr) {
229  // We may add instructions immediately after I, but we want to skip over
230  // them.
231  Instruction* I = Next;
232  Next = Next->getNextNode();
233 
234  // Get instruction details
235  unsigned Opcode = I->getOpcode();
236  bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
237  bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
238  bool UseSignedOp = Opcode == Instruction::SDiv ||
239  Opcode == Instruction::SRem;
240 
241  // Only optimize div or rem ops
242  if (!UseDivOp && !UseRemOp)
243  continue;
244 
245  // Skip division on vector types, only optimize integer instructions
246  if (!I->getType()->isIntegerTy())
247  continue;
248 
249  // Get bitwidth of div/rem instruction
250  IntegerType *T = cast<IntegerType>(I->getType());
251  unsigned int bitwidth = T->getBitWidth();
252 
253  // Continue if bitwidth is not bypassed
255  if (BI == BypassWidths.end())
256  continue;
257 
258  // Get type for div/rem instruction with bypass bitwidth
259  IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
260 
261  MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
262  }
263 
264  // Above we eagerly create divs and rems, as pairs, so that we can efficiently
265  // create divrem machine instructions. Now erase any unused divs / rems so we
266  // don't leave extra instructions sitting around.
267  for (auto &KV : DivCache)
268  for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
270 
271  return MadeChange;
272 }
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:76
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:274
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:65
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
#define F(x, y, z)
Definition: MD5.cpp:51
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:249
Value * getOperand(unsigned i) const
Definition: User.h:145
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:93
Class to represent integer types.
Definition: DerivedTypes.h:39
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:355
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:234
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
iterator end()
Definition: BasicBlock.h:230
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:572
Value * CreateSDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:891
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:879
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
uint64_t getBitMask() const
Return a bitmask with ones set for all of the bits that can be set by an unsigned version of this typ...
Definition: DerivedTypes.h:69
static bool insertFastDiv(Instruction *I, IntegerType *BypassType, bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache)
DenseMap< DivOpInfo, DivPhiNodes > DivCacheTy
#define I(x, y, z)
Definition: MD5.cpp:54
iterator end()
Definition: DenseMap.h:69
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:374
static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getHashValue(const DivOpInfo &Val)
LLVM Value Representation.
Definition: Value.h:71
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:111
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:103
const BasicBlock * getParent() const
Definition: Instruction.h:62
static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache)