LLVM API Documentation

BypassSlowDivision.cpp
Go to the documentation of this file.
00001 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains an optimization for div and rem on architectures that
00011 // execute short instructions significantly faster than longer instructions.
00012 // For example, on Intel Atom 32-bit divides are slow enough that during
00013 // runtime it is profitable to check the value of the operands, and if they are
00014 // positive and less than 256 use an unsigned 8-bit divide.
00015 //
00016 //===----------------------------------------------------------------------===//
00017 
00018 #define DEBUG_TYPE "bypass-slow-division"
00019 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
00020 #include "llvm/ADT/DenseMap.h"
00021 #include "llvm/IR/Function.h"
00022 #include "llvm/IR/IRBuilder.h"
00023 #include "llvm/IR/Instructions.h"
00024 
00025 using namespace llvm;
00026 
00027 namespace {
00028   struct DivOpInfo {
00029     bool SignedOp;
00030     Value *Dividend;
00031     Value *Divisor;
00032 
00033     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
00034       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
00035   };
00036 
00037   struct DivPhiNodes {
00038     PHINode *Quotient;
00039     PHINode *Remainder;
00040 
00041     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
00042       : Quotient(InQuotient), Remainder(InRemainder) {}
00043   };
00044 }
00045 
00046 namespace llvm {
00047   template<>
00048   struct DenseMapInfo<DivOpInfo> {
00049     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
00050       return Val1.SignedOp == Val2.SignedOp &&
00051              Val1.Dividend == Val2.Dividend &&
00052              Val1.Divisor == Val2.Divisor;
00053     }
00054 
00055     static DivOpInfo getEmptyKey() {
00056       return DivOpInfo(false, 0, 0);
00057     }
00058 
00059     static DivOpInfo getTombstoneKey() {
00060       return DivOpInfo(true, 0, 0);
00061     }
00062 
00063     static unsigned getHashValue(const DivOpInfo &Val) {
00064       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
00065                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
00066                         (unsigned)Val.SignedOp;
00067     }
00068   };
00069 
00070   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
00071 }
00072 
00073 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
00074 // value of the operands and uses a shorter-faster div/rem instruction when
00075 // possible and the longer-slower div/rem instruction otherwise.
00076 static bool insertFastDiv(Function &F,
00077                           Function::iterator &I,
00078                           BasicBlock::iterator &J,
00079                           IntegerType *BypassType,
00080                           bool UseDivOp,
00081                           bool UseSignedOp,
00082                           DivCacheTy &PerBBDivCache) {
00083   // Get instruction operands
00084   Instruction *Instr = J;
00085   Value *Dividend = Instr->getOperand(0);
00086   Value *Divisor = Instr->getOperand(1);
00087 
00088   if (isa<ConstantInt>(Divisor) ||
00089       (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
00090     // Operations with immediate values should have
00091     // been solved and replaced during compile time.
00092     return false;
00093   }
00094 
00095   // Basic Block is split before divide
00096   BasicBlock *MainBB = I;
00097   BasicBlock *SuccessorBB = I->splitBasicBlock(J);
00098   ++I; //advance iterator I to successorBB
00099 
00100   // Add new basic block for slow divide operation
00101   BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
00102                                           MainBB->getParent(), SuccessorBB);
00103   SlowBB->moveBefore(SuccessorBB);
00104   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
00105   Value *SlowQuotientV;
00106   Value *SlowRemainderV;
00107   if (UseSignedOp) {
00108     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
00109     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
00110   } else {
00111     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
00112     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
00113   }
00114   SlowBuilder.CreateBr(SuccessorBB);
00115 
00116   // Add new basic block for fast divide operation
00117   BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
00118                                           MainBB->getParent(), SuccessorBB);
00119   FastBB->moveBefore(SlowBB);
00120   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
00121   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
00122                                                 BypassType);
00123   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
00124                                                  BypassType);
00125 
00126   // udiv/urem because optimization only handles positive numbers
00127   Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
00128                                                       ShortDivisorV);
00129   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
00130                                                   ShortDivisorV);
00131   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
00132                                                 ShortQuotientV,
00133                                                 Dividend->getType());
00134   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
00135                                                  ShortRemainderV,
00136                                                  Dividend->getType());
00137   FastBuilder.CreateBr(SuccessorBB);
00138 
00139   // Phi nodes for result of div and rem
00140   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
00141   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
00142   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
00143   QuoPhi->addIncoming(FastQuotientV, FastBB);
00144   PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
00145   RemPhi->addIncoming(SlowRemainderV, SlowBB);
00146   RemPhi->addIncoming(FastRemainderV, FastBB);
00147 
00148   // Replace Instr with appropriate phi node
00149   if (UseDivOp)
00150     Instr->replaceAllUsesWith(QuoPhi);
00151   else
00152     Instr->replaceAllUsesWith(RemPhi);
00153   Instr->eraseFromParent();
00154 
00155   // Combine operands into a single value with OR for value testing below
00156   MainBB->getInstList().back().eraseFromParent();
00157   IRBuilder<> MainBuilder(MainBB, MainBB->end());
00158   Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
00159 
00160   // BitMask is inverted to check if the operands are
00161   // larger than the bypass type
00162   uint64_t BitMask = ~BypassType->getBitMask();
00163   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
00164 
00165   // Compare operand values and branch
00166   Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
00167   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
00168   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
00169 
00170   // point iterator J at first instruction of successorBB
00171   J = I->begin();
00172 
00173   // Cache phi nodes to be used later in place of other instances
00174   // of div or rem with the same sign, dividend, and divisor
00175   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
00176   DivPhiNodes Value(QuoPhi, RemPhi);
00177   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
00178   return true;
00179 }
00180 
00181 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
00182 // operands and operation are identical. Otherwise call insertFastDiv to perform
00183 // the optimization and cache the resulting dividend and remainder.
00184 static bool reuseOrInsertFastDiv(Function &F,
00185                                  Function::iterator &I,
00186                                  BasicBlock::iterator &J,
00187                                  IntegerType *BypassType,
00188                                  bool UseDivOp,
00189                                  bool UseSignedOp,
00190                                  DivCacheTy &PerBBDivCache) {
00191   // Get instruction operands
00192   Instruction *Instr = J;
00193   DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
00194   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
00195 
00196   if (CacheI == PerBBDivCache.end()) {
00197     // If previous instance does not exist, insert fast div
00198     return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
00199                          PerBBDivCache);
00200   }
00201 
00202   // Replace operation value with previously generated phi node
00203   DivPhiNodes &Value = CacheI->second;
00204   if (UseDivOp) {
00205     // Replace all uses of div instruction with quotient phi node
00206     J->replaceAllUsesWith(Value.Quotient);
00207   } else {
00208     // Replace all uses of rem instruction with remainder phi node
00209     J->replaceAllUsesWith(Value.Remainder);
00210   }
00211 
00212   // Advance to next operation
00213   ++J;
00214 
00215   // Remove redundant operation
00216   Instr->eraseFromParent();
00217   return true;
00218 }
00219 
00220 // bypassSlowDivision - This optimization identifies DIV instructions that can
00221 // be profitably bypassed and carried out with a shorter, faster divide.
00222 bool llvm::bypassSlowDivision(Function &F,
00223                               Function::iterator &I,
00224                               const DenseMap<unsigned int, unsigned int> &BypassWidths) {
00225   DivCacheTy DivCache;
00226 
00227   bool MadeChange = false;
00228   for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
00229 
00230     // Get instruction details
00231     unsigned Opcode = J->getOpcode();
00232     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
00233     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
00234     bool UseSignedOp = Opcode == Instruction::SDiv ||
00235                        Opcode == Instruction::SRem;
00236 
00237     // Only optimize div or rem ops
00238     if (!UseDivOp && !UseRemOp)
00239       continue;
00240 
00241     // Skip division on vector types, only optimize integer instructions
00242     if (!J->getType()->isIntegerTy())
00243       continue;
00244 
00245     // Get bitwidth of div/rem instruction
00246     IntegerType *T = cast<IntegerType>(J->getType());
00247     unsigned int bitwidth = T->getBitWidth();
00248 
00249     // Continue if bitwidth is not bypassed
00250     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
00251     if (BI == BypassWidths.end())
00252       continue;
00253 
00254     // Get type for div/rem instruction with bypass bitwidth
00255     IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
00256 
00257     MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
00258                                        UseSignedOp, DivCache);
00259   }
00260 
00261   return MadeChange;
00262 }