LLVM  3.7.0
BypassSlowDivision.cpp
Go to the documentation of this file.
1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
15 //
16 //===----------------------------------------------------------------------===//
17 
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instructions.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "bypass-slow-division"
27 
28 namespace {
29  struct DivOpInfo {
30  bool SignedOp;
31  Value *Dividend;
32  Value *Divisor;
33 
34  DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
35  : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
36  };
37 
38  struct DivPhiNodes {
39  PHINode *Quotient;
40  PHINode *Remainder;
41 
42  DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
43  : Quotient(InQuotient), Remainder(InRemainder) {}
44  };
45 }
46 
47 namespace llvm {
48  template<>
49  struct DenseMapInfo<DivOpInfo> {
50  static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
51  return Val1.SignedOp == Val2.SignedOp &&
52  Val1.Dividend == Val2.Dividend &&
53  Val1.Divisor == Val2.Divisor;
54  }
55 
56  static DivOpInfo getEmptyKey() {
57  return DivOpInfo(false, nullptr, nullptr);
58  }
59 
60  static DivOpInfo getTombstoneKey() {
61  return DivOpInfo(true, nullptr, nullptr);
62  }
63 
64  static unsigned getHashValue(const DivOpInfo &Val) {
65  return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
66  reinterpret_cast<uintptr_t>(Val.Divisor)) ^
67  (unsigned)Val.SignedOp;
68  }
69  };
70 
72 }
73 
74 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
75 // value of the operands and uses a shorter-faster div/rem instruction when
76 // possible and the longer-slower div/rem instruction otherwise.
77 static bool insertFastDiv(Function &F,
80  IntegerType *BypassType,
81  bool UseDivOp,
82  bool UseSignedOp,
83  DivCacheTy &PerBBDivCache) {
84  // Get instruction operands
85  Instruction *Instr = J;
86  Value *Dividend = Instr->getOperand(0);
87  Value *Divisor = Instr->getOperand(1);
88 
89  if (isa<ConstantInt>(Divisor) ||
90  (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
91  // Operations with immediate values should have
92  // been solved and replaced during compile time.
93  return false;
94  }
95 
96  // Basic Block is split before divide
97  BasicBlock *MainBB = I;
98  BasicBlock *SuccessorBB = I->splitBasicBlock(J);
99  ++I; //advance iterator I to successorBB
100 
101  // Add new basic block for slow divide operation
102  BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
103  MainBB->getParent(), SuccessorBB);
104  SlowBB->moveBefore(SuccessorBB);
105  IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
106  Value *SlowQuotientV;
107  Value *SlowRemainderV;
108  if (UseSignedOp) {
109  SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
110  SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
111  } else {
112  SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
113  SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
114  }
115  SlowBuilder.CreateBr(SuccessorBB);
116 
117  // Add new basic block for fast divide operation
118  BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
119  MainBB->getParent(), SuccessorBB);
120  FastBB->moveBefore(SlowBB);
121  IRBuilder<> FastBuilder(FastBB, FastBB->begin());
122  Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
123  BypassType);
124  Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
125  BypassType);
126 
127  // udiv/urem because optimization only handles positive numbers
128  Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
129  ShortDivisorV);
130  Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
131  ShortDivisorV);
132  Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
133  ShortQuotientV,
134  Dividend->getType());
135  Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
136  ShortRemainderV,
137  Dividend->getType());
138  FastBuilder.CreateBr(SuccessorBB);
139 
140  // Phi nodes for result of div and rem
141  IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
142  PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
143  QuoPhi->addIncoming(SlowQuotientV, SlowBB);
144  QuoPhi->addIncoming(FastQuotientV, FastBB);
145  PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
146  RemPhi->addIncoming(SlowRemainderV, SlowBB);
147  RemPhi->addIncoming(FastRemainderV, FastBB);
148 
149  // Replace Instr with appropriate phi node
150  if (UseDivOp)
151  Instr->replaceAllUsesWith(QuoPhi);
152  else
153  Instr->replaceAllUsesWith(RemPhi);
154  Instr->eraseFromParent();
155 
156  // Combine operands into a single value with OR for value testing below
157  MainBB->getInstList().back().eraseFromParent();
158  IRBuilder<> MainBuilder(MainBB, MainBB->end());
159  Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
160 
161  // BitMask is inverted to check if the operands are
162  // larger than the bypass type
163  uint64_t BitMask = ~BypassType->getBitMask();
164  Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
165 
166  // Compare operand values and branch
167  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
168  Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
169  MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
170 
171  // point iterator J at first instruction of successorBB
172  J = I->begin();
173 
174  // Cache phi nodes to be used later in place of other instances
175  // of div or rem with the same sign, dividend, and divisor
176  DivOpInfo Key(UseSignedOp, Dividend, Divisor);
177  DivPhiNodes Value(QuoPhi, RemPhi);
178  PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
179  return true;
180 }
181 
182 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
183 // operands and operation are identical. Otherwise call insertFastDiv to perform
184 // the optimization and cache the resulting dividend and remainder.
188  IntegerType *BypassType,
189  bool UseDivOp,
190  bool UseSignedOp,
191  DivCacheTy &PerBBDivCache) {
192  // Get instruction operands
193  Instruction *Instr = J;
194  DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
195  DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
196 
197  if (CacheI == PerBBDivCache.end()) {
198  // If previous instance does not exist, insert fast div
199  return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
200  PerBBDivCache);
201  }
202 
203  // Replace operation value with previously generated phi node
204  DivPhiNodes &Value = CacheI->second;
205  if (UseDivOp) {
206  // Replace all uses of div instruction with quotient phi node
207  J->replaceAllUsesWith(Value.Quotient);
208  } else {
209  // Replace all uses of rem instruction with remainder phi node
210  J->replaceAllUsesWith(Value.Remainder);
211  }
212 
213  // Advance to next operation
214  ++J;
215 
216  // Remove redundant operation
217  Instr->eraseFromParent();
218  return true;
219 }
220 
221 // bypassSlowDivision - This optimization identifies DIV instructions that can
222 // be profitably bypassed and carried out with a shorter, faster divide.
225  const DenseMap<unsigned int, unsigned int> &BypassWidths) {
226  DivCacheTy DivCache;
227 
228  bool MadeChange = false;
229  for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
230 
231  // Get instruction details
232  unsigned Opcode = J->getOpcode();
233  bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
234  bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
235  bool UseSignedOp = Opcode == Instruction::SDiv ||
236  Opcode == Instruction::SRem;
237 
238  // Only optimize div or rem ops
239  if (!UseDivOp && !UseRemOp)
240  continue;
241 
242  // Skip division on vector types, only optimize integer instructions
243  if (!J->getType()->isIntegerTy())
244  continue;
245 
246  // Get bitwidth of div/rem instruction
247  IntegerType *T = cast<IntegerType>(J->getType());
248  unsigned int bitwidth = T->getBitWidth();
249 
250  // Continue if bitwidth is not bypassed
252  if (BI == BypassWidths.end())
253  continue;
254 
255  // Get type for div/rem instruction with bypass bitwidth
256  IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
257 
258  MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
259  UseSignedOp, DivCache);
260  }
261 
262  return MadeChange;
263 }
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
void addIncoming(Value *V, BasicBlock *BB)
addIncoming - Add an incoming value to the end of the PHI list
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
F(f)
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:61
static bool insertFastDiv(Function &F, Function::iterator &I, BasicBlock::iterator &J, IntegerType *BypassType, bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:231
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:169
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:252
Value * getOperand(unsigned i) const
Definition: User.h:118
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:103
Class to represent integer types.
Definition: DerivedTypes.h:37
static bool reuseOrInsertFastDiv(Function &F, Function::iterator &I, BasicBlock::iterator &J, IntegerType *BypassType, bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache)
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:304
iterator end()
Definition: BasicBlock.h:233
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:597
Value * CreateSDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:782
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:770
uint64_t getBitMask() const
getBitMask - Return a bitmask with ones set for all of the bits that can be set by an unsigned versio...
Definition: DerivedTypes.h:66
DenseMap< DivOpInfo, DivPhiNodes > DivCacheTy
#define I(x, y, z)
Definition: MD5.cpp:54
iterator end()
Definition: DenseMap.h:68
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
bool bypassSlowDivision(Function &F, Function::iterator &I, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions that can be profitably bypassed and carried out with a ...
static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2)
reference back()
Definition: ilist.h:398
static unsigned getHashValue(const DivOpInfo &Val)
LLVM Value Representation.
Definition: Value.h:69
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:103