39#define DEBUG_TYPE "bypass-slow-division"
47 QuotRemPair(
Value *InQuotient,
Value *InRemainder)
48 : Quotient(InQuotient), Remainder(InRemainder) {}
54 struct QuotRemWithBB {
56 Value *Quotient =
nullptr;
57 Value *Remainder =
nullptr;
74class FastDivInsertionTask {
75 bool IsValidTask =
false;
80 bool isHashLikeValue(
Value *V, VisitedSetTy &Visited);
81 ValueRange getValueRange(
Value *
Op, VisitedSetTy &Visited);
84 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &
LHS, QuotRemWithBB &
RHS,
87 std::optional<QuotRemPair> insertFastDivAndRem();
90 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
91 SlowDivOrRem->
getOpcode() == Instruction::SRem;
95 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
96 SlowDivOrRem->
getOpcode() == Instruction::UDiv;
99 Type *getSlowType() {
return SlowDivOrRem->
getType(); }
102 FastDivInsertionTask(
Instruction *
I,
const BypassWidthsTy &BypassWidths);
104 Value *getReplacement(DivCacheTy &Cache);
109FastDivInsertionTask::FastDivInsertionTask(
Instruction *
I,
110 const BypassWidthsTy &BypassWidths) {
111 switch (
I->getOpcode()) {
112 case Instruction::UDiv:
113 case Instruction::SDiv:
114 case Instruction::URem:
115 case Instruction::SRem:
124 IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
129 auto BI = BypassWidths.find(SlowType->
getBitWidth());
130 if (BI == BypassWidths.end())
138 MainBB =
I->getParent();
148Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
154 Value *Dividend = SlowDivOrRem->getOperand(0);
155 Value *Divisor = SlowDivOrRem->getOperand(1);
157 auto CacheI = Cache.find(Key);
159 if (CacheI == Cache.end()) {
161 std::optional<QuotRemPair> OptResult = insertFastDivAndRem();
165 CacheI = Cache.insert({
Key, *OptResult}).first;
168 QuotRemPair &
Value = CacheI->second;
169 return isDivisionOp() ?
Value.Quotient :
Value.Remainder;
187bool FastDivInsertionTask::isHashLikeValue(
Value *V, VisitedSetTy &Visited) {
192 switch (
I->getOpcode()) {
193 case Instruction::Xor:
195 case Instruction::Mul: {
200 Value *Op1 =
I->getOperand(1);
202 if (!
C && isa<BitCastInst>(Op1))
203 C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
204 return C &&
C->getValue().getSignificantBits() > BypassType->getBitWidth();
206 case Instruction::PHI:
209 if (Visited.size() >= 16)
213 if (!Visited.insert(
I).second)
218 return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
227ValueRange FastDivInsertionTask::getValueRange(
Value *V,
228 VisitedSetTy &Visited) {
229 unsigned ShortLen = BypassType->getBitWidth();
230 unsigned LongLen =
V->getType()->getIntegerBitWidth();
232 assert(LongLen > ShortLen &&
"Value type must be wider than BypassType");
233 unsigned HiBits = LongLen - ShortLen;
240 if (Known.countMinLeadingZeros() >= HiBits)
241 return VALRNG_KNOWN_SHORT;
243 if (Known.countMaxLeadingZeros() < HiBits)
244 return VALRNG_LIKELY_LONG;
250 if (isHashLikeValue(V, Visited))
251 return VALRNG_LIKELY_LONG;
253 return VALRNG_UNKNOWN;
258QuotRemWithBB FastDivInsertionTask::createSlowBB(
BasicBlock *SuccessorBB) {
259 QuotRemWithBB DivRemPair;
261 MainBB->getParent(), SuccessorBB);
262 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
263 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
265 Value *Dividend = SlowDivOrRem->getOperand(0);
266 Value *Divisor = SlowDivOrRem->getOperand(1);
269 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
270 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
272 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
273 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
276 Builder.CreateBr(SuccessorBB);
282QuotRemWithBB FastDivInsertionTask::createFastBB(
BasicBlock *SuccessorBB) {
283 QuotRemWithBB DivRemPair;
285 MainBB->getParent(), SuccessorBB);
286 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
287 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
289 Value *Dividend = SlowDivOrRem->getOperand(0);
290 Value *Divisor = SlowDivOrRem->getOperand(1);
291 Value *ShortDivisorV =
292 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
293 Value *ShortDividendV =
294 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
297 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
298 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
299 DivRemPair.Quotient =
300 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
301 DivRemPair.Remainder =
302 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
303 Builder.CreateBr(SuccessorBB);
309QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
313 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
314 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
317 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
320 return QuotRemPair(QuoPhi, RemPhi);
327Value *FastDivInsertionTask::insertOperandRuntimeCheck(
Value *Op1,
Value *Op2) {
328 assert((Op1 || Op2) &&
"Nothing to check");
330 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
334 OrV = Builder.CreateOr(Op1, Op2);
336 OrV = Op1 ? Op1 : Op2;
340 uint64_t BitMask = ~BypassType->getBitMask();
341 Value *AndV = Builder.CreateAnd(OrV, BitMask);
345 return Builder.CreateICmpEQ(AndV, ZeroV);
350std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
351 Value *Dividend = SlowDivOrRem->getOperand(0);
352 Value *Divisor = SlowDivOrRem->getOperand(1);
355 ValueRange DividendRange = getValueRange(Dividend, SetL);
356 if (DividendRange == VALRNG_LIKELY_LONG)
360 ValueRange DivisorRange = getValueRange(Divisor, SetR);
361 if (DivisorRange == VALRNG_LIKELY_LONG)
364 bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
365 bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
367 if (DividendShort && DivisorShort) {
374 Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
375 Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
376 Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
377 Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
378 Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
379 Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
380 return QuotRemPair(ExtDiv, ExtRem);
383 if (isa<ConstantInt>(Divisor)) {
394 if (
auto *BCI = dyn_cast<BitCastInst>(Divisor))
395 if (BCI->getParent() == SlowDivOrRem->getParent() &&
396 isa<ConstantInt>(BCI->getOperand(0)))
400 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
420 Long.Quotient = ConstantInt::get(getSlowType(), 0);
421 Long.Remainder = Dividend;
422 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
423 QuotRemPair
Result = createDivRemPhiNodes(
Fast,
Long, SuccessorBB);
424 Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
425 Builder.CreateCondBr(CmpV,
Fast.BB, SuccessorBB);
435 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
436 QuotRemWithBB Slow = createSlowBB(SuccessorBB);
437 QuotRemPair
Result = createDivRemPhiNodes(
Fast, Slow, SuccessorBB);
438 Value *CmpV = insertOperandRuntimeCheck(DividendShort ?
nullptr : Dividend,
439 DivisorShort ?
nullptr : Divisor);
440 Builder.CreateCondBr(CmpV,
Fast.BB, Slow.BB);
448 const BypassWidthsTy &BypassWidths) {
449 DivCacheTy PerBBDivCache;
451 bool MadeChange =
false;
453 while (Next !=
nullptr) {
463 FastDivInsertionTask Task(
I, BypassWidths);
464 if (
Value *Replacement = Task.getReplacement(PerBBDivCache)) {
465 I->replaceAllUsesWith(Replacement);
466 I->eraseFromParent();
474 for (
auto &KV : PerBBDivCache)
475 for (
Value *V : {KV.second.Quotient, KV.second.Remainder})
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int isSignedOp(ISD::CondCode Opcode)
For an integer comparison, return 1 if the comparison is a signed operation and 2 if the result is an...
This file defines the SmallPtrSet class.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Instruction & back() const
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...