LLVM API Documentation
00001 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains an optimization for div and rem on architectures that 00011 // execute short instructions significantly faster than longer instructions. 00012 // For example, on Intel Atom 32-bit divides are slow enough that during 00013 // runtime it is profitable to check the value of the operands, and if they are 00014 // positive and less than 256 use an unsigned 8-bit divide. 00015 // 00016 //===----------------------------------------------------------------------===// 00017 00018 #define DEBUG_TYPE "bypass-slow-division" 00019 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 00020 #include "llvm/ADT/DenseMap.h" 00021 #include "llvm/IR/Function.h" 00022 #include "llvm/IR/IRBuilder.h" 00023 #include "llvm/IR/Instructions.h" 00024 00025 using namespace llvm; 00026 00027 namespace { 00028 struct DivOpInfo { 00029 bool SignedOp; 00030 Value *Dividend; 00031 Value *Divisor; 00032 00033 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) 00034 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} 00035 }; 00036 00037 struct DivPhiNodes { 00038 PHINode *Quotient; 00039 PHINode *Remainder; 00040 00041 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) 00042 : Quotient(InQuotient), Remainder(InRemainder) {} 00043 }; 00044 } 00045 00046 namespace llvm { 00047 template<> 00048 struct DenseMapInfo<DivOpInfo> { 00049 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { 00050 return Val1.SignedOp == Val2.SignedOp && 00051 Val1.Dividend == Val2.Dividend && 00052 Val1.Divisor == Val2.Divisor; 00053 } 00054 00055 static DivOpInfo getEmptyKey() { 00056 return DivOpInfo(false, 0, 0); 00057 } 00058 00059 static DivOpInfo getTombstoneKey() { 00060 return DivOpInfo(true, 0, 0); 00061 } 00062 00063 static unsigned getHashValue(const DivOpInfo &Val) { 00064 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ 00065 reinterpret_cast<uintptr_t>(Val.Divisor)) ^ 00066 (unsigned)Val.SignedOp; 00067 } 00068 }; 00069 00070 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; 00071 } 00072 00073 // insertFastDiv - Substitutes the div/rem instruction with code that checks the 00074 // value of the operands and uses a shorter-faster div/rem instruction when 00075 // possible and the longer-slower div/rem instruction otherwise. 00076 static bool insertFastDiv(Function &F, 00077 Function::iterator &I, 00078 BasicBlock::iterator &J, 00079 IntegerType *BypassType, 00080 bool UseDivOp, 00081 bool UseSignedOp, 00082 DivCacheTy &PerBBDivCache) { 00083 // Get instruction operands 00084 Instruction *Instr = J; 00085 Value *Dividend = Instr->getOperand(0); 00086 Value *Divisor = Instr->getOperand(1); 00087 00088 if (isa<ConstantInt>(Divisor) || 00089 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { 00090 // Operations with immediate values should have 00091 // been solved and replaced during compile time. 00092 return false; 00093 } 00094 00095 // Basic Block is split before divide 00096 BasicBlock *MainBB = I; 00097 BasicBlock *SuccessorBB = I->splitBasicBlock(J); 00098 ++I; //advance iterator I to successorBB 00099 00100 // Add new basic block for slow divide operation 00101 BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", 00102 MainBB->getParent(), SuccessorBB); 00103 SlowBB->moveBefore(SuccessorBB); 00104 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); 00105 Value *SlowQuotientV; 00106 Value *SlowRemainderV; 00107 if (UseSignedOp) { 00108 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); 00109 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); 00110 } else { 00111 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); 00112 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); 00113 } 00114 SlowBuilder.CreateBr(SuccessorBB); 00115 00116 // Add new basic block for fast divide operation 00117 BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", 00118 MainBB->getParent(), SuccessorBB); 00119 FastBB->moveBefore(SlowBB); 00120 IRBuilder<> FastBuilder(FastBB, FastBB->begin()); 00121 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, 00122 BypassType); 00123 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, 00124 BypassType); 00125 00126 // udiv/urem because optimization only handles positive numbers 00127 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, 00128 ShortDivisorV); 00129 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, 00130 ShortDivisorV); 00131 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, 00132 ShortQuotientV, 00133 Dividend->getType()); 00134 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, 00135 ShortRemainderV, 00136 Dividend->getType()); 00137 FastBuilder.CreateBr(SuccessorBB); 00138 00139 // Phi nodes for result of div and rem 00140 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); 00141 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 00142 QuoPhi->addIncoming(SlowQuotientV, SlowBB); 00143 QuoPhi->addIncoming(FastQuotientV, FastBB); 00144 PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 00145 RemPhi->addIncoming(SlowRemainderV, SlowBB); 00146 RemPhi->addIncoming(FastRemainderV, FastBB); 00147 00148 // Replace Instr with appropriate phi node 00149 if (UseDivOp) 00150 Instr->replaceAllUsesWith(QuoPhi); 00151 else 00152 Instr->replaceAllUsesWith(RemPhi); 00153 Instr->eraseFromParent(); 00154 00155 // Combine operands into a single value with OR for value testing below 00156 MainBB->getInstList().back().eraseFromParent(); 00157 IRBuilder<> MainBuilder(MainBB, MainBB->end()); 00158 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); 00159 00160 // BitMask is inverted to check if the operands are 00161 // larger than the bypass type 00162 uint64_t BitMask = ~BypassType->getBitMask(); 00163 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); 00164 00165 // Compare operand values and branch 00166 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); 00167 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); 00168 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); 00169 00170 // point iterator J at first instruction of successorBB 00171 J = I->begin(); 00172 00173 // Cache phi nodes to be used later in place of other instances 00174 // of div or rem with the same sign, dividend, and divisor 00175 DivOpInfo Key(UseSignedOp, Dividend, Divisor); 00176 DivPhiNodes Value(QuoPhi, RemPhi); 00177 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); 00178 return true; 00179 } 00180 00181 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if 00182 // operands and operation are identical. Otherwise call insertFastDiv to perform 00183 // the optimization and cache the resulting dividend and remainder. 00184 static bool reuseOrInsertFastDiv(Function &F, 00185 Function::iterator &I, 00186 BasicBlock::iterator &J, 00187 IntegerType *BypassType, 00188 bool UseDivOp, 00189 bool UseSignedOp, 00190 DivCacheTy &PerBBDivCache) { 00191 // Get instruction operands 00192 Instruction *Instr = J; 00193 DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); 00194 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); 00195 00196 if (CacheI == PerBBDivCache.end()) { 00197 // If previous instance does not exist, insert fast div 00198 return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, 00199 PerBBDivCache); 00200 } 00201 00202 // Replace operation value with previously generated phi node 00203 DivPhiNodes &Value = CacheI->second; 00204 if (UseDivOp) { 00205 // Replace all uses of div instruction with quotient phi node 00206 J->replaceAllUsesWith(Value.Quotient); 00207 } else { 00208 // Replace all uses of rem instruction with remainder phi node 00209 J->replaceAllUsesWith(Value.Remainder); 00210 } 00211 00212 // Advance to next operation 00213 ++J; 00214 00215 // Remove redundant operation 00216 Instr->eraseFromParent(); 00217 return true; 00218 } 00219 00220 // bypassSlowDivision - This optimization identifies DIV instructions that can 00221 // be profitably bypassed and carried out with a shorter, faster divide. 00222 bool llvm::bypassSlowDivision(Function &F, 00223 Function::iterator &I, 00224 const DenseMap<unsigned int, unsigned int> &BypassWidths) { 00225 DivCacheTy DivCache; 00226 00227 bool MadeChange = false; 00228 for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { 00229 00230 // Get instruction details 00231 unsigned Opcode = J->getOpcode(); 00232 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; 00233 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; 00234 bool UseSignedOp = Opcode == Instruction::SDiv || 00235 Opcode == Instruction::SRem; 00236 00237 // Only optimize div or rem ops 00238 if (!UseDivOp && !UseRemOp) 00239 continue; 00240 00241 // Skip division on vector types, only optimize integer instructions 00242 if (!J->getType()->isIntegerTy()) 00243 continue; 00244 00245 // Get bitwidth of div/rem instruction 00246 IntegerType *T = cast<IntegerType>(J->getType()); 00247 unsigned int bitwidth = T->getBitWidth(); 00248 00249 // Continue if bitwidth is not bypassed 00250 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); 00251 if (BI == BypassWidths.end()) 00252 continue; 00253 00254 // Get type for div/rem instruction with bypass bitwidth 00255 IntegerType *BT = IntegerType::get(J->getContext(), BI->second); 00256 00257 MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, 00258 UseSignedOp, DivCache); 00259 } 00260 00261 return MadeChange; 00262 }