40#define DEBUG_TYPE "bypass-slow-division"
48 QuotRemPair(
Value *InQuotient,
Value *InRemainder)
49 : Quotient(InQuotient), Remainder(InRemainder) {}
55 struct QuotRemWithBB {
57 Value *Quotient =
nullptr;
58 Value *Remainder =
nullptr;
75class FastDivInsertionTask {
76 bool IsValidTask =
false;
81 bool isHashLikeValue(
Value *V, VisitedSetTy &Visited);
82 ValueRange getValueRange(
Value *
Op, VisitedSetTy &Visited);
85 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &
LHS, QuotRemWithBB &
RHS,
88 std::optional<QuotRemPair> insertFastDivAndRem();
91 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
92 SlowDivOrRem->
getOpcode() == Instruction::SRem;
96 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
97 SlowDivOrRem->
getOpcode() == Instruction::UDiv;
100 Type *getSlowType() {
return SlowDivOrRem->
getType(); }
103 FastDivInsertionTask(
Instruction *
I,
const BypassWidthsTy &BypassWidths);
105 Value *getReplacement(DivCacheTy &Cache);
110FastDivInsertionTask::FastDivInsertionTask(
Instruction *
I,
111 const BypassWidthsTy &BypassWidths) {
112 switch (
I->getOpcode()) {
113 case Instruction::UDiv:
114 case Instruction::SDiv:
115 case Instruction::URem:
116 case Instruction::SRem:
125 IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
130 auto BI = BypassWidths.find(SlowType->
getBitWidth());
131 if (BI == BypassWidths.end())
139 MainBB =
I->getParent();
149Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
155 Value *Dividend = SlowDivOrRem->getOperand(0);
156 Value *Divisor = SlowDivOrRem->getOperand(1);
158 auto CacheI = Cache.find(Key);
160 if (CacheI == Cache.end()) {
162 std::optional<QuotRemPair> OptResult = insertFastDivAndRem();
166 CacheI = Cache.insert({
Key, *OptResult}).first;
169 QuotRemPair &
Value = CacheI->second;
170 return isDivisionOp() ?
Value.Quotient :
Value.Remainder;
188bool FastDivInsertionTask::isHashLikeValue(
Value *V, VisitedSetTy &Visited) {
193 switch (
I->getOpcode()) {
194 case Instruction::Xor:
196 case Instruction::Mul: {
201 Value *Op1 =
I->getOperand(1);
203 if (!
C && isa<BitCastInst>(Op1))
204 C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
205 return C &&
C->getValue().getSignificantBits() > BypassType->getBitWidth();
207 case Instruction::PHI:
210 if (Visited.size() >= 16)
214 if (!Visited.insert(
I).second)
219 return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
228ValueRange FastDivInsertionTask::getValueRange(
Value *V,
229 VisitedSetTy &Visited) {
230 unsigned ShortLen = BypassType->getBitWidth();
231 unsigned LongLen =
V->getType()->getIntegerBitWidth();
233 assert(LongLen > ShortLen &&
"Value type must be wider than BypassType");
234 unsigned HiBits = LongLen - ShortLen;
241 if (Known.countMinLeadingZeros() >= HiBits)
242 return VALRNG_KNOWN_SHORT;
244 if (Known.countMaxLeadingZeros() < HiBits)
245 return VALRNG_LIKELY_LONG;
251 if (isHashLikeValue(V, Visited))
252 return VALRNG_LIKELY_LONG;
254 return VALRNG_UNKNOWN;
259QuotRemWithBB FastDivInsertionTask::createSlowBB(
BasicBlock *SuccessorBB) {
260 QuotRemWithBB DivRemPair;
262 MainBB->getParent(), SuccessorBB);
263 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
264 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
266 Value *Dividend = SlowDivOrRem->getOperand(0);
267 Value *Divisor = SlowDivOrRem->getOperand(1);
270 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
271 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
273 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
274 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
277 Builder.CreateBr(SuccessorBB);
283QuotRemWithBB FastDivInsertionTask::createFastBB(
BasicBlock *SuccessorBB) {
284 QuotRemWithBB DivRemPair;
286 MainBB->getParent(), SuccessorBB);
287 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
288 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
290 Value *Dividend = SlowDivOrRem->getOperand(0);
291 Value *Divisor = SlowDivOrRem->getOperand(1);
292 Value *ShortDivisorV =
293 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
294 Value *ShortDividendV =
295 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
298 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
299 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
300 DivRemPair.Quotient =
301 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
302 DivRemPair.Remainder =
303 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
304 Builder.CreateBr(SuccessorBB);
310QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
314 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
315 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
318 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
321 return QuotRemPair(QuoPhi, RemPhi);
328Value *FastDivInsertionTask::insertOperandRuntimeCheck(
Value *Op1,
Value *Op2) {
329 assert((Op1 || Op2) &&
"Nothing to check");
331 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
335 OrV = Builder.CreateOr(Op1, Op2);
337 OrV = Op1 ? Op1 : Op2;
341 uint64_t BitMask = ~BypassType->getBitMask();
342 Value *AndV = Builder.CreateAnd(OrV, BitMask);
346 return Builder.CreateICmpEQ(AndV, ZeroV);
351std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
352 Value *Dividend = SlowDivOrRem->getOperand(0);
353 Value *Divisor = SlowDivOrRem->getOperand(1);
356 ValueRange DividendRange = getValueRange(Dividend, SetL);
357 if (DividendRange == VALRNG_LIKELY_LONG)
361 ValueRange DivisorRange = getValueRange(Divisor, SetR);
362 if (DivisorRange == VALRNG_LIKELY_LONG)
365 bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
366 bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
368 if (DividendShort && DivisorShort) {
375 Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
376 Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
377 Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
378 Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
379 Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
380 Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
381 return QuotRemPair(ExtDiv, ExtRem);
384 if (isa<ConstantInt>(Divisor)) {
395 if (
auto *BCI = dyn_cast<BitCastInst>(Divisor))
396 if (BCI->getParent() == SlowDivOrRem->getParent() &&
397 isa<ConstantInt>(BCI->getOperand(0)))
401 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
421 Long.Quotient = ConstantInt::get(getSlowType(), 0);
422 Long.Remainder = Dividend;
423 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
424 QuotRemPair
Result = createDivRemPhiNodes(
Fast,
Long, SuccessorBB);
425 Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
426 Builder.CreateCondBr(CmpV,
Fast.BB, SuccessorBB);
436 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
437 QuotRemWithBB Slow = createSlowBB(SuccessorBB);
438 QuotRemPair
Result = createDivRemPhiNodes(
Fast, Slow, SuccessorBB);
439 Value *CmpV = insertOperandRuntimeCheck(DividendShort ?
nullptr : Dividend,
440 DivisorShort ?
nullptr : Divisor);
441 Builder.CreateCondBr(CmpV,
Fast.BB, Slow.BB);
449 const BypassWidthsTy &BypassWidths) {
450 DivCacheTy PerBBDivCache;
452 bool MadeChange =
false;
454 while (Next !=
nullptr) {
464 FastDivInsertionTask Task(
I, BypassWidths);
465 if (
Value *Replacement = Task.getReplacement(PerBBDivCache)) {
466 I->replaceAllUsesWith(Replacement);
467 I->eraseFromParent();
475 for (
auto &KV : PerBBDivCache)
476 for (
Value *V : {KV.second.Quotient, KV.second.Remainder})
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Module.h This file contains the declarations for the Module class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int isSignedOp(ISD::CondCode Opcode)
For an integer comparison, return 1 if the comparison is a signed operation and 2 if the result is an...
This file defines the SmallPtrSet class.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Instruction & back() const
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...