LLVM API Documentation

BypassSlowDivision.cpp
Go to the documentation of this file.
00001 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains an optimization for div and rem on architectures that
00011 // execute short instructions significantly faster than longer instructions.
00012 // For example, on Intel Atom 32-bit divides are slow enough that during
00013 // runtime it is profitable to check the value of the operands, and if they are
00014 // positive and less than 256 use an unsigned 8-bit divide.
00015 //
00016 //===----------------------------------------------------------------------===//
00017 
00018 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
00019 #include "llvm/ADT/DenseMap.h"
00020 #include "llvm/IR/Function.h"
00021 #include "llvm/IR/IRBuilder.h"
00022 #include "llvm/IR/Instructions.h"
00023 
00024 using namespace llvm;
00025 
00026 #define DEBUG_TYPE "bypass-slow-division"
00027 
00028 namespace {
00029   struct DivOpInfo {
00030     bool SignedOp;
00031     Value *Dividend;
00032     Value *Divisor;
00033 
00034     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
00035       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
00036   };
00037 
00038   struct DivPhiNodes {
00039     PHINode *Quotient;
00040     PHINode *Remainder;
00041 
00042     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
00043       : Quotient(InQuotient), Remainder(InRemainder) {}
00044   };
00045 }
00046 
00047 namespace llvm {
00048   template<>
00049   struct DenseMapInfo<DivOpInfo> {
00050     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
00051       return Val1.SignedOp == Val2.SignedOp &&
00052              Val1.Dividend == Val2.Dividend &&
00053              Val1.Divisor == Val2.Divisor;
00054     }
00055 
00056     static DivOpInfo getEmptyKey() {
00057       return DivOpInfo(false, 0, 0);
00058     }
00059 
00060     static DivOpInfo getTombstoneKey() {
00061       return DivOpInfo(true, 0, 0);
00062     }
00063 
00064     static unsigned getHashValue(const DivOpInfo &Val) {
00065       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
00066                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
00067                         (unsigned)Val.SignedOp;
00068     }
00069   };
00070 
00071   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
00072 }
00073 
00074 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
00075 // value of the operands and uses a shorter-faster div/rem instruction when
00076 // possible and the longer-slower div/rem instruction otherwise.
00077 static bool insertFastDiv(Function &F,
00078                           Function::iterator &I,
00079                           BasicBlock::iterator &J,
00080                           IntegerType *BypassType,
00081                           bool UseDivOp,
00082                           bool UseSignedOp,
00083                           DivCacheTy &PerBBDivCache) {
00084   // Get instruction operands
00085   Instruction *Instr = J;
00086   Value *Dividend = Instr->getOperand(0);
00087   Value *Divisor = Instr->getOperand(1);
00088 
00089   if (isa<ConstantInt>(Divisor) ||
00090       (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
00091     // Operations with immediate values should have
00092     // been solved and replaced during compile time.
00093     return false;
00094   }
00095 
00096   // Basic Block is split before divide
00097   BasicBlock *MainBB = I;
00098   BasicBlock *SuccessorBB = I->splitBasicBlock(J);
00099   ++I; //advance iterator I to successorBB
00100 
00101   // Add new basic block for slow divide operation
00102   BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
00103                                           MainBB->getParent(), SuccessorBB);
00104   SlowBB->moveBefore(SuccessorBB);
00105   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
00106   Value *SlowQuotientV;
00107   Value *SlowRemainderV;
00108   if (UseSignedOp) {
00109     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
00110     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
00111   } else {
00112     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
00113     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
00114   }
00115   SlowBuilder.CreateBr(SuccessorBB);
00116 
00117   // Add new basic block for fast divide operation
00118   BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
00119                                           MainBB->getParent(), SuccessorBB);
00120   FastBB->moveBefore(SlowBB);
00121   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
00122   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
00123                                                 BypassType);
00124   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
00125                                                  BypassType);
00126 
00127   // udiv/urem because optimization only handles positive numbers
00128   Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
00129                                                       ShortDivisorV);
00130   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
00131                                                   ShortDivisorV);
00132   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
00133                                                 ShortQuotientV,
00134                                                 Dividend->getType());
00135   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
00136                                                  ShortRemainderV,
00137                                                  Dividend->getType());
00138   FastBuilder.CreateBr(SuccessorBB);
00139 
00140   // Phi nodes for result of div and rem
00141   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
00142   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
00143   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
00144   QuoPhi->addIncoming(FastQuotientV, FastBB);
00145   PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
00146   RemPhi->addIncoming(SlowRemainderV, SlowBB);
00147   RemPhi->addIncoming(FastRemainderV, FastBB);
00148 
00149   // Replace Instr with appropriate phi node
00150   if (UseDivOp)
00151     Instr->replaceAllUsesWith(QuoPhi);
00152   else
00153     Instr->replaceAllUsesWith(RemPhi);
00154   Instr->eraseFromParent();
00155 
00156   // Combine operands into a single value with OR for value testing below
00157   MainBB->getInstList().back().eraseFromParent();
00158   IRBuilder<> MainBuilder(MainBB, MainBB->end());
00159   Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
00160 
00161   // BitMask is inverted to check if the operands are
00162   // larger than the bypass type
00163   uint64_t BitMask = ~BypassType->getBitMask();
00164   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
00165 
00166   // Compare operand values and branch
00167   Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
00168   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
00169   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
00170 
00171   // point iterator J at first instruction of successorBB
00172   J = I->begin();
00173 
00174   // Cache phi nodes to be used later in place of other instances
00175   // of div or rem with the same sign, dividend, and divisor
00176   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
00177   DivPhiNodes Value(QuoPhi, RemPhi);
00178   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
00179   return true;
00180 }
00181 
00182 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
00183 // operands and operation are identical. Otherwise call insertFastDiv to perform
00184 // the optimization and cache the resulting dividend and remainder.
00185 static bool reuseOrInsertFastDiv(Function &F,
00186                                  Function::iterator &I,
00187                                  BasicBlock::iterator &J,
00188                                  IntegerType *BypassType,
00189                                  bool UseDivOp,
00190                                  bool UseSignedOp,
00191                                  DivCacheTy &PerBBDivCache) {
00192   // Get instruction operands
00193   Instruction *Instr = J;
00194   DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
00195   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
00196 
00197   if (CacheI == PerBBDivCache.end()) {
00198     // If previous instance does not exist, insert fast div
00199     return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
00200                          PerBBDivCache);
00201   }
00202 
00203   // Replace operation value with previously generated phi node
00204   DivPhiNodes &Value = CacheI->second;
00205   if (UseDivOp) {
00206     // Replace all uses of div instruction with quotient phi node
00207     J->replaceAllUsesWith(Value.Quotient);
00208   } else {
00209     // Replace all uses of rem instruction with remainder phi node
00210     J->replaceAllUsesWith(Value.Remainder);
00211   }
00212 
00213   // Advance to next operation
00214   ++J;
00215 
00216   // Remove redundant operation
00217   Instr->eraseFromParent();
00218   return true;
00219 }
00220 
00221 // bypassSlowDivision - This optimization identifies DIV instructions that can
00222 // be profitably bypassed and carried out with a shorter, faster divide.
00223 bool llvm::bypassSlowDivision(Function &F,
00224                               Function::iterator &I,
00225                               const DenseMap<unsigned int, unsigned int> &BypassWidths) {
00226   DivCacheTy DivCache;
00227 
00228   bool MadeChange = false;
00229   for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
00230 
00231     // Get instruction details
00232     unsigned Opcode = J->getOpcode();
00233     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
00234     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
00235     bool UseSignedOp = Opcode == Instruction::SDiv ||
00236                        Opcode == Instruction::SRem;
00237 
00238     // Only optimize div or rem ops
00239     if (!UseDivOp && !UseRemOp)
00240       continue;
00241 
00242     // Skip division on vector types, only optimize integer instructions
00243     if (!J->getType()->isIntegerTy())
00244       continue;
00245 
00246     // Get bitwidth of div/rem instruction
00247     IntegerType *T = cast<IntegerType>(J->getType());
00248     unsigned int bitwidth = T->getBitWidth();
00249 
00250     // Continue if bitwidth is not bypassed
00251     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
00252     if (BI == BypassWidths.end())
00253       continue;
00254 
00255     // Get type for div/rem instruction with bypass bitwidth
00256     IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
00257 
00258     MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
00259                                        UseSignedOp, DivCache);
00260   }
00261 
00262   return MadeChange;
00263 }