LLVM  mainline
BypassSlowDivision.cpp
Go to the documentation of this file.
00001 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains an optimization for div and rem on architectures that
00011 // execute short instructions significantly faster than longer instructions.
00012 // For example, on Intel Atom 32-bit divides are slow enough that during
00013 // runtime it is profitable to check the value of the operands, and if they are
00014 // positive and less than 256 use an unsigned 8-bit divide.
00015 //
00016 //===----------------------------------------------------------------------===//
00017 
00018 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
00019 #include "llvm/ADT/DenseMap.h"
00020 #include "llvm/IR/Function.h"
00021 #include "llvm/IR/IRBuilder.h"
00022 #include "llvm/IR/Instructions.h"
00023 
00024 using namespace llvm;
00025 
00026 #define DEBUG_TYPE "bypass-slow-division"
00027 
00028 namespace {
00029   struct DivOpInfo {
00030     bool SignedOp;
00031     Value *Dividend;
00032     Value *Divisor;
00033 
00034     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
00035       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
00036   };
00037 
00038   struct DivPhiNodes {
00039     PHINode *Quotient;
00040     PHINode *Remainder;
00041 
00042     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
00043       : Quotient(InQuotient), Remainder(InRemainder) {}
00044   };
00045 }
00046 
00047 namespace llvm {
00048   template<>
00049   struct DenseMapInfo<DivOpInfo> {
00050     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
00051       return Val1.SignedOp == Val2.SignedOp &&
00052              Val1.Dividend == Val2.Dividend &&
00053              Val1.Divisor == Val2.Divisor;
00054     }
00055 
00056     static DivOpInfo getEmptyKey() {
00057       return DivOpInfo(false, nullptr, nullptr);
00058     }
00059 
00060     static DivOpInfo getTombstoneKey() {
00061       return DivOpInfo(true, nullptr, nullptr);
00062     }
00063 
00064     static unsigned getHashValue(const DivOpInfo &Val) {
00065       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
00066                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
00067                         (unsigned)Val.SignedOp;
00068     }
00069   };
00070 
00071   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
00072 }
00073 
00074 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
00075 // value of the operands and uses a shorter-faster div/rem instruction when
00076 // possible and the longer-slower div/rem instruction otherwise.
00077 static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
00078                           bool UseDivOp, bool UseSignedOp,
00079                           DivCacheTy &PerBBDivCache) {
00080   Function *F = I->getParent()->getParent();
00081   // Get instruction operands
00082   Value *Dividend = I->getOperand(0);
00083   Value *Divisor = I->getOperand(1);
00084 
00085   if (isa<ConstantInt>(Divisor) ||
00086       (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
00087     // Operations with immediate values should have
00088     // been solved and replaced during compile time.
00089     return false;
00090   }
00091 
00092   // Basic Block is split before divide
00093   BasicBlock *MainBB = &*I->getParent();
00094   BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
00095 
00096   // Add new basic block for slow divide operation
00097   BasicBlock *SlowBB =
00098       BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
00099   SlowBB->moveBefore(SuccessorBB);
00100   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
00101   Value *SlowQuotientV;
00102   Value *SlowRemainderV;
00103   if (UseSignedOp) {
00104     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
00105     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
00106   } else {
00107     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
00108     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
00109   }
00110   SlowBuilder.CreateBr(SuccessorBB);
00111 
00112   // Add new basic block for fast divide operation
00113   BasicBlock *FastBB =
00114       BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
00115   FastBB->moveBefore(SlowBB);
00116   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
00117   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
00118                                                 BypassType);
00119   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
00120                                                  BypassType);
00121 
00122   // udiv/urem because optimization only handles positive numbers
00123   Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
00124                                                       ShortDivisorV);
00125   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
00126                                                   ShortDivisorV);
00127   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
00128                                                 ShortQuotientV,
00129                                                 Dividend->getType());
00130   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
00131                                                  ShortRemainderV,
00132                                                  Dividend->getType());
00133   FastBuilder.CreateBr(SuccessorBB);
00134 
00135   // Phi nodes for result of div and rem
00136   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
00137   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
00138   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
00139   QuoPhi->addIncoming(FastQuotientV, FastBB);
00140   PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
00141   RemPhi->addIncoming(SlowRemainderV, SlowBB);
00142   RemPhi->addIncoming(FastRemainderV, FastBB);
00143 
00144   // Replace I with appropriate phi node
00145   if (UseDivOp)
00146     I->replaceAllUsesWith(QuoPhi);
00147   else
00148     I->replaceAllUsesWith(RemPhi);
00149   I->eraseFromParent();
00150 
00151   // Combine operands into a single value with OR for value testing below
00152   MainBB->getInstList().back().eraseFromParent();
00153   IRBuilder<> MainBuilder(MainBB, MainBB->end());
00154   Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
00155 
00156   // BitMask is inverted to check if the operands are
00157   // larger than the bypass type
00158   uint64_t BitMask = ~BypassType->getBitMask();
00159   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
00160 
00161   // Compare operand values and branch
00162   Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
00163   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
00164   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
00165 
00166   // Cache phi nodes to be used later in place of other instances
00167   // of div or rem with the same sign, dividend, and divisor
00168   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
00169   DivPhiNodes Value(QuoPhi, RemPhi);
00170   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
00171   return true;
00172 }
00173 
00174 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
00175 // the current BB if operands and operation are identical. Otherwise calls
00176 // insertFastDiv to perform the optimization and caches the resulting dividend
00177 // and remainder.
00178 static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
00179                                  bool UseDivOp, bool UseSignedOp,
00180                                  DivCacheTy &PerBBDivCache) {
00181   // Get instruction operands
00182   DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
00183   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
00184 
00185   if (CacheI == PerBBDivCache.end()) {
00186     // If previous instance does not exist, insert fast div
00187     return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
00188   }
00189 
00190   // Replace operation value with previously generated phi node
00191   DivPhiNodes &Value = CacheI->second;
00192   if (UseDivOp) {
00193     // Replace all uses of div instruction with quotient phi node
00194     I->replaceAllUsesWith(Value.Quotient);
00195   } else {
00196     // Replace all uses of rem instruction with remainder phi node
00197     I->replaceAllUsesWith(Value.Remainder);
00198   }
00199 
00200   // Remove redundant operation
00201   I->eraseFromParent();
00202   return true;
00203 }
00204 
00205 // bypassSlowDivision - This optimization identifies DIV instructions in a BB
00206 // that can be profitably bypassed and carried out with a shorter, faster
00207 // divide.
00208 bool llvm::bypassSlowDivision(
00209     BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
00210   DivCacheTy DivCache;
00211 
00212   bool MadeChange = false;
00213   Instruction* Next = &*BB->begin();
00214   while (Next != nullptr) {
00215     // We may add instructions immediately after I, but we want to skip over
00216     // them.
00217     Instruction* I = Next;
00218     Next = Next->getNextNode();
00219 
00220     // Get instruction details
00221     unsigned Opcode = I->getOpcode();
00222     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
00223     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
00224     bool UseSignedOp = Opcode == Instruction::SDiv ||
00225                        Opcode == Instruction::SRem;
00226 
00227     // Only optimize div or rem ops
00228     if (!UseDivOp && !UseRemOp)
00229       continue;
00230 
00231     // Skip division on vector types, only optimize integer instructions
00232     if (!I->getType()->isIntegerTy())
00233       continue;
00234 
00235     // Get bitwidth of div/rem instruction
00236     IntegerType *T = cast<IntegerType>(I->getType());
00237     unsigned int bitwidth = T->getBitWidth();
00238 
00239     // Continue if bitwidth is not bypassed
00240     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
00241     if (BI == BypassWidths.end())
00242       continue;
00243 
00244     // Get type for div/rem instruction with bypass bitwidth
00245     IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
00246 
00247     MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
00248   }
00249 
00250   return MadeChange;
00251 }