Go to the documentation of this file.
122 cl::desc(
"If set to true, IRCE may eliminate wide range checks in loops "
123 "with narrow latch condition."));
127 #define DEBUG_TYPE "irce"
141 class InductiveRangeCheck {
143 const SCEV *Begin =
nullptr;
144 const SCEV *Step =
nullptr;
145 const SCEV *End =
nullptr;
146 Use *CheckUse =
nullptr;
158 const SCEV *getBegin()
const {
return Begin; }
159 const SCEV *getStep()
const {
return Step; }
160 const SCEV *getEnd()
const {
return End; }
163 OS <<
"InductiveRangeCheck:\n";
170 OS <<
"\n CheckUse: ";
171 getCheckUse()->getUser()->print(OS);
172 OS <<
" Operand: " << getCheckUse()->getOperandNo() <<
"\n";
180 Use *getCheckUse()
const {
return CheckUse; }
190 Range(
const SCEV *Begin,
const SCEV *End) : Begin(Begin), End(End) {
191 assert(Begin->
getType() == End->getType() &&
"ill-typed range!");
195 const SCEV *getBegin()
const {
return Begin; }
196 const SCEV *getEnd()
const {
return End; }
209 bool getPassingDirection() {
return true; }
216 bool IsLatchSigned)
const;
229 struct LoopStructure;
231 class InductiveRangeCheckElimination {
249 : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {}
280 "Inductive range check elimination",
false,
false)
293 InductiveRangeCheck::parseRangeCheckICmp(
Loop *L,
ICmpInst *ICI,
295 Value *&Length,
bool &IsSigned) {
296 auto IsLoopInvariant = [&SE, L](
Value *V) {
297 return SE.isLoopInvariant(SE.getSCEV(V), L);
313 if (
match(
RHS, m_ConstantInt<0>())) {
324 if (
match(
RHS, m_ConstantInt<-1>())) {
329 if (IsLoopInvariant(
LHS)) {
341 if (IsLoopInvariant(
LHS)) {
352 void InductiveRangeCheck::extractRangeChecksFromCond(
356 Value *Condition = ConditionUse.
get();
357 if (!Visited.
insert(Condition).second)
362 extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(0),
364 extractRangeChecksFromCond(L, SE, cast<User>(Condition)->getOperandUse(1),
369 ICmpInst *ICI = dyn_cast<ICmpInst>(Condition);
375 if (!parseRangeCheckICmp(L, ICI, SE,
Index, Length, IsSigned))
378 const auto *IndexAddRec = dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(
Index));
380 IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
385 const SCEV *End =
nullptr;
394 unsigned BitWidth = cast<IntegerType>(IndexAddRec->getType())->getBitWidth();
399 InductiveRangeCheck IRC;
401 IRC.Begin = IndexAddRec->getStart();
402 IRC.Step = IndexAddRec->getStepRecurrence(SE);
403 IRC.CheckUse = &ConditionUse;
404 Checks.push_back(IRC);
407 void InductiveRangeCheck::extractRangeChecksFromBranch(
420 InductiveRangeCheck::extractRangeChecksFromCond(L, SE, BI->
getOperandUse(0),
446 DisableLICMVersioning, DisableDistribution});
459 struct LoopStructure {
460 const char *
Tag =
"";
480 Value *IndVarBase =
nullptr;
481 Value *IndVarStart =
nullptr;
482 Value *IndVarStep =
nullptr;
483 Value *LoopExitAt =
nullptr;
484 bool IndVarIncreasing =
false;
485 bool IsSignedPredicate =
true;
487 LoopStructure() =
default;
489 template <
typename M> LoopStructure map(M Map)
const {
492 Result.Header = cast<BasicBlock>(
Map(Header));
493 Result.Latch = cast<BasicBlock>(
Map(Latch));
494 Result.LatchBr = cast<BranchInst>(
Map(LatchBr));
495 Result.LatchExit = cast<BasicBlock>(
Map(LatchExit));
496 Result.LatchBrExitIdx = LatchBrExitIdx;
501 Result.IndVarIncreasing = IndVarIncreasing;
502 Result.IsSignedPredicate = IsSignedPredicate;
518 class LoopConstrainer {
522 std::vector<BasicBlock *> Blocks;
528 LoopStructure Structure;
533 struct RewrittenRangeInfo {
536 std::vector<PHINode *> PHIValuesAtPseudoExit;
539 RewrittenRangeInfo() =
default;
563 void cloneLoop(ClonedLoop &CLResult,
const char *
Tag)
const;
567 Loop *createClonedLoopStructure(
Loop *Original,
Loop *Parent,
592 changeIterationSpaceEnd(
const LoopStructure &
LS,
BasicBlock *Preheader,
599 const char *
Tag)
const;
605 void rewriteIncomingValuesForPHIs(
606 LoopStructure &
LS,
BasicBlock *ContinuationBlockAndPreheader,
607 const LoopConstrainer::RewrittenRangeInfo &RRI)
const;
626 const SCEV *LatchTakenCount =
nullptr;
634 InductiveRangeCheck::Range Range;
638 LoopStructure MainLoopStructure;
645 :
F(*L.getHeader()->
getParent()), Ctx(L.getHeader()->getContext()),
646 SE(SE), DT(DT), LI(LI), LPMAddNewLoop(LPMAddNewLoop), OriginalLoop(L),
647 Range(
R), MainLoopStructure(
LS) {}
658 const SCEV *BoundSCEV,
const SCEV *Step,
660 unsigned LatchBrExitIdx,
677 LLVM_DEBUG(
dbgs() <<
"irce: LatchExitBrIdx: " << LatchBrExitIdx <<
"\n");
685 if (LatchBrExitIdx == 1)
688 assert(LatchBrExitIdx == 0 &&
689 "LatchBrExitIdx should be either 0 or 1");
692 unsigned BitWidth = cast<IntegerType>(BoundSCEV->
getType())->getBitWidth();
697 const SCEV *MinusOne =
708 const SCEV *BoundSCEV,
const SCEV *Step,
710 unsigned LatchBrExitIdx,
725 LLVM_DEBUG(
dbgs() <<
"irce: LatchExitBrIdx: " << LatchBrExitIdx <<
"\n");
733 if (LatchBrExitIdx == 1)
736 assert(LatchBrExitIdx == 0 &&
"LatchBrExitIdx should be 0 or 1");
738 const SCEV *StepMinusOne =
740 unsigned BitWidth = cast<IntegerType>(BoundSCEV->
getType())->getBitWidth();
752 const char *&FailureReason) {
754 FailureReason =
"loop not in LoopSimplify form";
759 assert(Latch &&
"Simplified loops only have one latch!");
762 FailureReason =
"loop has already been cloned";
767 FailureReason =
"no loop latch";
774 FailureReason =
"no preheader";
780 FailureReason =
"latch terminator not conditional branch";
784 unsigned LatchBrExitIdx = LatchBr->
getSuccessor(0) == Header ? 1 : 0;
788 FailureReason =
"latch terminator branch not conditional on integral icmp";
793 if (isa<SCEVCouldNotCompute>(LatchCount)) {
794 FailureReason =
"could not compute latch count";
807 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
808 if (isa<SCEVAddRecExpr>(RightSCEV)) {
813 FailureReason =
"no add recurrences in the icmp";
822 IntegerType *Ty = cast<IntegerType>(AR->getType());
830 const SCEV *ExtendedStep =
847 const SCEVAddRecExpr *IndVarBase = cast<SCEVAddRecExpr>(LeftSCEV);
849 FailureReason =
"LHS in icmp not induction variable";
853 if (!isa<SCEVConstant>(StepRec)) {
854 FailureReason =
"LHS in icmp not induction variable";
859 if (ICI->
isEquality() && !HasNoSignedWrap(IndVarBase)) {
860 FailureReason =
"LHS in icmp needs nsw for equality predicates";
866 bool IsSignedPredicate;
872 const SCEV *FixedRightSCEV =
nullptr;
876 if (
auto *
I = dyn_cast<Instruction>(RightValue))
878 FixedRightSCEV = RightSCEV;
881 bool DecreasedRightValueByOne =
false;
882 if (StepCI->
isOne()) {
907 DecreasedRightValueByOne =
true;
912 DecreasedRightValueByOne =
true;
919 bool FoundExpectedPred =
920 (LTPred && LatchBrExitIdx == 1) || (GTPred && LatchBrExitIdx == 0);
922 if (!FoundExpectedPred) {
923 FailureReason =
"expected icmp slt semantically, found something else";
929 FailureReason =
"unsigned latch conditions are explicitly prohibited";
934 LatchBrExitIdx, &L, SE)) {
935 FailureReason =
"Unsafe loop bounds";
938 if (LatchBrExitIdx == 0) {
941 if (!DecreasedRightValueByOne)
945 assert(!DecreasedRightValueByOne &&
946 "Right value can be decreased only for LatchBrExitIdx == 0!");
949 bool IncreasedRightValueByOne =
false;
970 IncreasedRightValueByOne =
true;
974 IncreasedRightValueByOne =
true;
982 bool FoundExpectedPred =
983 (GTPred && LatchBrExitIdx == 1) || (LTPred && LatchBrExitIdx == 0);
985 if (!FoundExpectedPred) {
986 FailureReason =
"expected icmp sgt semantically, found something else";
994 FailureReason =
"unsigned latch conditions are explicitly prohibited";
999 LatchBrExitIdx, &L, SE)) {
1000 FailureReason =
"Unsafe bounds";
1004 if (LatchBrExitIdx == 0) {
1007 if (!IncreasedRightValueByOne)
1011 assert(!IncreasedRightValueByOne &&
1012 "Right value can be increased only for LatchBrExitIdx == 0!");
1019 "loop variant exit count doesn't make sense!");
1028 Expander.expandCodeFor(FixedRightSCEV, FixedRightSCEV->
getType(),
Ins);
1030 Value *IndVarStartV = Expander.expandCodeFor(IndVarStart, IndVarTy,
Ins);
1031 IndVarStartV->
setName(
"indvar.start");
1038 Result.LatchBr = LatchBr;
1039 Result.LatchExit = LatchExit;
1040 Result.LatchBrExitIdx = LatchBrExitIdx;
1041 Result.IndVarStart = IndVarStartV;
1042 Result.IndVarStep = StepCI;
1043 Result.IndVarBase = LeftValue;
1044 Result.IndVarIncreasing = IsIncreasing;
1045 Result.LoopExitAt = RightValue;
1046 Result.IsSignedPredicate = IsSignedPredicate;
1048 FailureReason =
nullptr;
1061 LoopConstrainer::calculateSubRanges(
bool IsSignedPredicate)
const {
1062 IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
1064 auto *RTy = cast<IntegerType>(Range.getType());
1072 LoopConstrainer::SubRanges
Result;
1078 RTy, SE, IsSignedPredicate);
1080 SE, IsSignedPredicate);
1082 bool Increasing = MainLoopStructure.IndVarIncreasing;
1088 const SCEV *Smallest =
nullptr, *Greatest =
nullptr, *GreatestSeen =
nullptr;
1114 GreatestSeen = Start;
1117 auto Clamp = [
this, Smallest, Greatest, IsSignedPredicate](
const SCEV *
S) {
1118 return IsSignedPredicate
1129 bool ProvablyNoPreloop =
1131 if (!ProvablyNoPreloop)
1134 bool ProvablyNoPostLoop =
1136 if (!ProvablyNoPostLoop)
1143 const char *
Tag)
const {
1146 Result.Blocks.push_back(Clone);
1151 assert(V &&
"null values not in domain!");
1152 auto It =
Result.Map.find(V);
1153 if (It ==
Result.Map.end())
1155 return static_cast<Value *
>(It->second);
1159 cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch()));
1163 Result.Structure = MainLoopStructure.map(GetClonedValue);
1166 for (
unsigned i = 0,
e =
Result.Blocks.size();
i !=
e; ++
i) {
1168 BasicBlock *OriginalBB = OriginalLoop.getBlocks()[
i];
1170 assert(
Result.Map[OriginalBB] == ClonedBB &&
"invariant!");
1181 if (OriginalLoop.contains(SBB))
1185 Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB);
1186 PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB);
1192 LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
1266 RewrittenRangeInfo RRI;
1268 BasicBlock *BBInsertLocation =
LS.Latch->getNextNode();
1270 &
F, BBInsertLocation);
1275 bool Increasing =
LS.IndVarIncreasing;
1276 bool IsSignedPredicate =
LS.IsSignedPredicate;
1279 auto *RangeTy = Range.getBegin()->getType();
1280 auto NoopOrExt = [&](
Value *V) {
1281 if (V->getType() == RangeTy)
1283 return IsSignedPredicate ?
B.CreateSExt(V, RangeTy,
"wide." + V->getName())
1284 :
B.CreateZExt(V, RangeTy,
"wide." + V->getName());
1288 Value *EnterLoopCond =
nullptr;
1293 Value *IndVarStart = NoopOrExt(
LS.IndVarStart);
1294 EnterLoopCond =
B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
1296 B.CreateCondBr(EnterLoopCond,
LS.Header, RRI.PseudoExit);
1299 LS.LatchBr->setSuccessor(
LS.LatchBrExitIdx, RRI.ExitSelector);
1300 B.SetInsertPoint(
LS.LatchBr);
1301 Value *IndVarBase = NoopOrExt(
LS.IndVarBase);
1302 Value *TakeBackedgeLoopCond =
B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
1304 Value *CondForBranch =
LS.LatchBrExitIdx == 1
1305 ? TakeBackedgeLoopCond
1306 :
B.CreateNot(TakeBackedgeLoopCond);
1308 LS.LatchBr->setCondition(CondForBranch);
1310 B.SetInsertPoint(RRI.ExitSelector);
1315 Value *LoopExitAt = NoopOrExt(
LS.LoopExitAt);
1316 Value *IterationsLeft =
B.CreateICmp(Pred, IndVarBase, LoopExitAt);
1317 B.CreateCondBr(IterationsLeft, RRI.PseudoExit,
LS.LatchExit);
1327 BranchToContinuation);
1329 NewPHI->
addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader);
1332 RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
1336 BranchToContinuation);
1337 RRI.IndVarEnd->addIncoming(IndVarStart, Preheader);
1338 RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector);
1342 LS.LatchExit->replacePhiUsesWith(
LS.Latch, RRI.ExitSelector);
1347 void LoopConstrainer::rewriteIncomingValuesForPHIs(
1349 const LoopConstrainer::RewrittenRangeInfo &RRI)
const {
1350 unsigned PHIIndex = 0;
1352 PN.setIncomingValueForBlock(ContinuationBlock,
1353 RRI.PHIValuesAtPseudoExit[PHIIndex++]);
1355 LS.IndVarStart = RRI.IndVarEnd;
1358 BasicBlock *LoopConstrainer::createPreheader(
const LoopStructure &
LS,
1360 const char *
Tag)
const {
1364 LS.Header->replacePhiUsesWith(OldPreheader, Preheader);
1378 Loop *LoopConstrainer::createClonedLoopStructure(
Loop *Original,
Loop *Parent,
1381 Loop &
New = *LI.AllocateLoop();
1385 LI.addTopLevelLoop(&New);
1386 LPMAddNewLoop(&New, IsSubloop);
1389 for (
auto *
BB : Original->
blocks())
1390 if (LI.getLoopFor(
BB) == Original)
1391 New.addBasicBlockToLoop(cast<BasicBlock>(VM[
BB]), LI);
1394 for (
Loop *SubLoop : *Original)
1395 createClonedLoopStructure(SubLoop, &New, VM,
true);
1402 LatchTakenCount = SE.
getExitCount(&OriginalLoop, MainLoopStructure.Latch);
1403 Preheader = OriginalLoop.getLoopPreheader();
1404 assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader !=
nullptr &&
1407 OriginalPreheader = Preheader;
1408 MainLoopPreheader = Preheader;
1410 bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;
1417 SubRanges SR = *MaybeSR;
1418 bool Increasing = MainLoopStructure.IndVarIncreasing;
1420 cast<IntegerType>(Range.getBegin()->getType());
1422 SCEVExpander Expander(SE,
F.getParent()->getDataLayout(),
"irce");
1423 Instruction *InsertPt = OriginalPreheader->getTerminator();
1428 ClonedLoop PreLoop, PostLoop;
1430 Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value();
1431 bool NeedsPostLoop =
1432 Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value();
1434 Value *ExitPreLoopAt =
nullptr;
1435 Value *ExitMainLoopAt =
nullptr;
1437 cast<SCEVConstant>(SE.
getConstant(IVTy, -1,
true ));
1440 const SCEV *ExitPreLoopAtSCEV =
nullptr;
1443 ExitPreLoopAtSCEV = *SR.LowLimit;
1446 ExitPreLoopAtSCEV = SE.
getAddExpr(*SR.HighLimit, MinusOneS);
1448 LLVM_DEBUG(
dbgs() <<
"irce: could not prove no-overflow when computing "
1449 <<
"preloop exit limit. HighLimit = "
1450 << *(*SR.HighLimit) <<
"\n");
1455 LLVM_DEBUG(
dbgs() <<
"irce: could not prove that it is safe to expand the"
1456 <<
" preloop exit limit " << *ExitPreLoopAtSCEV
1462 ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
1463 ExitPreLoopAt->
setName(
"exit.preloop.at");
1466 if (NeedsPostLoop) {
1467 const SCEV *ExitMainLoopAtSCEV =
nullptr;
1470 ExitMainLoopAtSCEV = *SR.HighLimit;
1473 ExitMainLoopAtSCEV = SE.
getAddExpr(*SR.LowLimit, MinusOneS);
1475 LLVM_DEBUG(
dbgs() <<
"irce: could not prove no-overflow when computing "
1476 <<
"mainloop exit limit. LowLimit = "
1477 << *(*SR.LowLimit) <<
"\n");
1482 LLVM_DEBUG(
dbgs() <<
"irce: could not prove that it is safe to expand the"
1483 <<
" main loop exit limit " << *ExitMainLoopAtSCEV
1489 ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
1490 ExitMainLoopAt->
setName(
"exit.mainloop.at");
1500 RewrittenRangeInfo PreLoopRRI;
1504 PreLoop.Structure.Header);
1507 createPreheader(MainLoopStructure, Preheader,
"mainloop");
1508 PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
1509 ExitPreLoopAt, MainLoopPreheader);
1510 rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
1515 RewrittenRangeInfo PostLoopRRI;
1517 if (NeedsPostLoop) {
1519 createPreheader(PostLoop.Structure, Preheader,
"postloop");
1520 PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
1521 ExitMainLoopAt, PostLoopPreheader);
1522 rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
1527 MainLoopPreheader != Preheader ? MainLoopPreheader :
nullptr;
1528 BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit,
1529 PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit,
1530 PostLoopRRI.ExitSelector, NewMainLoopPreheader};
1545 Loop *PreL =
nullptr, *PostL =
nullptr;
1546 if (!PreLoop.Blocks.empty()) {
1547 PreL = createClonedLoopStructure(&OriginalLoop,
1548 OriginalLoop.getParentLoop(), PreLoop.Map,
1552 if (!PostLoop.Blocks.empty()) {
1554 createClonedLoopStructure(&OriginalLoop, OriginalLoop.getParentLoop(),
1555 PostLoop.Map,
false);
1559 auto CanonicalizeLoop = [&] (
Loop *L,
bool IsOriginalLoop) {
1561 simplifyLoop(L, &DT, &LI, &SE,
nullptr,
nullptr,
true);
1564 if (!IsOriginalLoop)
1568 CanonicalizeLoop(PreL,
false);
1570 CanonicalizeLoop(PostL,
false);
1571 CanonicalizeLoop(&OriginalLoop,
true);
1580 InductiveRangeCheck::computeSafeIterationSpace(
1582 bool IsLatchSigned)
const {
1585 auto *IVType = cast<IntegerType>(IndVar->
getType());
1586 auto *RCType = cast<IntegerType>(getBegin()->
getType());
1587 if (IVType->getBitWidth() > RCType->getBitWidth())
1617 assert(!
B->isZero() &&
"Recurrence with zero step?");
1619 const SCEV *
C = getBegin();
1624 assert(!
D->getValue()->isZero() &&
"Recurrence with zero step?");
1625 unsigned BitWidth = RCType->getBitWidth();
1640 auto ClampedSubtract = [&](
const SCEV *
X,
const SCEV *
Y) {
1645 if (IsLatchSigned) {
1676 auto SCEVCheckNonNegative = [&](
const SCEV *
X) {
1697 const SCEV *REnd = getEnd();
1698 const SCEV *EndIsNonNegative = SCEVCheckNonNegative(REnd);
1700 const SCEV *Begin = SE.
getMulExpr(ClampedSubtract(Zero, M), EndIsNonNegative);
1701 const SCEV *End = SE.
getMulExpr(ClampedSubtract(REnd, M), EndIsNonNegative);
1702 return InductiveRangeCheck::Range(Begin, End);
1708 const InductiveRangeCheck::Range &
R2) {
1709 if (
R2.isEmpty(SE,
true))
1716 assert(!R1Value.isEmpty(SE,
true) &&
1717 "We should never have empty R1!");
1721 if (R1Value.getType() !=
R2.getType())
1728 auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);
1729 if (
Ret.isEmpty(SE,
true))
1737 const InductiveRangeCheck::Range &
R2) {
1738 if (
R2.isEmpty(SE,
false))
1745 assert(!R1Value.isEmpty(SE,
false) &&
1746 "We should never have empty R1!");
1750 if (R1Value.getType() !=
R2.getType())
1757 auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd);
1758 if (
Ret.isEmpty(SE,
false))
1778 InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI });
1780 bool Changed =
false;
1782 bool CFGChanged =
false;
1783 for (
const auto &L : LI) {
1784 CFGChanged |=
simplifyLoop(L, &DT, &LI, &SE,
nullptr,
nullptr,
1788 Changed |= CFGChanged;
1799 auto LPMAddNewLoop = [&Worklist](
Loop *
NL,
bool IsSubloop) {
1804 while (!Worklist.
empty()) {
1806 if (IRCE.run(L, LPMAddNewLoop)) {
1822 if (skipFunction(
F))
1825 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1827 getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
1828 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1829 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1830 InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI);
1832 bool Changed =
false;
1834 for (
const auto &L : LI) {
1835 Changed |=
simplifyLoop(L, &DT, &LI, &SE,
nullptr,
nullptr,
1842 auto LPMAddNewLoop = [&](
Loop *
NL,
bool IsSubloop) {
1847 while (!Worklist.
empty()) {
1849 Changed |= IRCE.run(L, LPMAddNewLoop);
1856 LoopStructure &
LS) {
1861 uint64_t hFreq =
BFI.getBlockFreq(
LS.Header).getFrequency();
1865 <<
"the estimated number of iterations basing on "
1866 "frequency info is " << (hFreq / phFreq) <<
"\n";);
1878 <<
"the exit probability is too big " << ExitProbability
1888 LLVM_DEBUG(
dbgs() <<
"irce: giving up constraining loop, too large\n");
1902 if (
BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
1903 InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
1906 if (RangeChecks.empty())
1909 auto PrintRecognizedRangeChecks = [&](
raw_ostream &OS) {
1910 OS <<
"irce: looking at loop "; L->
print(OS);
1911 OS <<
"irce: loop has " << RangeChecks.size()
1912 <<
" inductive range checks: \n";
1913 for (InductiveRangeCheck &IRC : RangeChecks)
1920 PrintRecognizedRangeChecks(
errs());
1922 const char *FailureReason =
nullptr;
1924 LoopStructure::parseLoopStructure(SE, *L, FailureReason);
1925 if (!MaybeLoopStructure) {
1927 << FailureReason <<
"\n";);
1930 LoopStructure
LS = *MaybeLoopStructure;
1944 auto IntersectRange =
1948 for (InductiveRangeCheck &IRC : RangeChecks) {
1949 auto Result = IRC.computeSafeIterationSpace(SE, IndVar,
1950 LS.IsSignedPredicate);
1952 auto MaybeSafeIterRange =
1953 IntersectRange(SE, SafeIterRange,
Result.getValue());
1954 if (MaybeSafeIterRange) {
1956 !MaybeSafeIterRange.getValue().isEmpty(SE,
LS.IsSignedPredicate) &&
1957 "We should never return empty ranges!");
1958 RangeChecksToEliminate.push_back(IRC);
1959 SafeIterRange = MaybeSafeIterRange.
getValue();
1967 LoopConstrainer LC(*L, LI, LPMAddNewLoop,
LS, SE, DT,
1969 bool Changed = LC.run();
1972 auto PrintConstrainedLoopInfo = [L]() {
1973 dbgs() <<
"irce: in function ";
1975 dbgs() <<
"constrained ";
1982 PrintConstrainedLoopInfo();
1986 for (InductiveRangeCheck &IRC : RangeChecksToEliminate) {
1987 ConstantInt *FoldedRangeCheck = IRC.getPassingDirection()
1990 IRC.getCheckUse()->set(FoldedRangeCheck);
1998 return new IRCELegacyPass();
A set of analyses that are preserved following a run of a transformation pass.
Analysis pass that exposes the ScalarEvolution for a function.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
This is an optimization pass for GlobalISel generic memory operations.
static IntegerType * getInt1Ty(LLVMContext &C)
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
static cl::opt< bool > AllowUnsignedLatchCondition("irce-allow-unsigned-latch", cl::Hidden, cl::init(true))
static Optional< InductiveRangeCheck::Range > IntersectSignedRange(ScalarEvolution &SE, const Optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)
A parsed version of the target data layout string in and methods for querying it.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Function * getParent() const
Return the enclosing method, or null if none.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
const SCEV * getStart() const
Represents a single loop in the control flow graph.
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
The main scalar evolution driver.
INITIALIZE_PASS_BEGIN(IRCELegacyPass, "irce", "Inductive range check elimination", false, false) INITIALIZE_PASS_END(IRCELegacyPass
void abandon()
Mark an analysis as abandoned.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
@ ICMP_SGT
signed greater than
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
@ LoopInvariant
The SCEV is loop-invariant.
The instances of the Type class are immutable: once they are created, they are never changed.
const_iterator end(StringRef path)
Get end iterator over path.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
The legacy pass manager's analysis pass to compute loop information.
void initializeIRCELegacyPassPass(PassRegistry &)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
static void DisableAllLoopOptsOnLoop(Loop &L)
auto successors(MachineBasicBlock *BB)
@ ICMP_SLE
signed less or equal
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
static StringRef getPredicateName(Predicate P)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVM Basic Block Representation.
static const char * ClonedLoopTag
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Inductive range check elimination
const Use & getOperandUse(unsigned i) const
This is the shared class of boolean and integer constants.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Analysis pass which computes BranchProbabilityInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static cl::opt< bool > PrintRangeChecks("irce-print-range-checks", cl::Hidden, cl::init(false))
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool match(Val *V, const Pattern &P)
Legacy analysis pass which computes BranchProbabilityInfo.
(vector float) vec_cmpeq(*A, *B) C
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
@ ICMP_ULE
unsigned less or equal
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
void print(raw_ostream &OS) const
Print out the internal representation of this scalar to the specified stream.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
Analysis providing branch probability information.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Represent the analysis usage information of a pass.
LLVM_NODISCARD T pop_back_val()
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
iterator_range< block_iterator > blocks() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Class to represent integer types.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
Legacy analysis pass which computes a DominatorTree.
This class implements an extremely fast bulk output stream that can only output to a stream.
void setName(const Twine &Name)
Change the name of the value.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Analysis pass which computes BlockFrequencyInfo.
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Value * getCondition() const
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
An efficient, type-erasing, non-owning reference to a callable.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
This class represents an analyzed expression in the program.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
static cl::opt< bool > SkipProfitabilityChecks("irce-skip-profitability-checks", cl::Hidden, cl::init(false))
This instruction compares its operands according to the predicate given to the constructor.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const SCEV * getSMaxExpr(const SCEV *LHS, const SCEV *RHS)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This is an important class for using LLVM in a threaded context.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
initializer< Ty > init(const Ty &Val)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
This class represents a constant integer value.
bool empty() const
Determine if the PriorityWorklist is empty or not.
static MDString * get(LLVMContext &Context, StringRef Str)
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const SCEV * NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE, bool Signed)
If the type of S matches with Ty, return S.
constexpr const T & getValue() const &
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
@ ICMP_UGE
unsigned greater or equal
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isUnconditional() const
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
Class for arbitrary precision integers.
@ ICMP_SLT
signed less than
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
Pass * createInductiveRangeCheckEliminationPass()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getConstant(ConstantInt *V)
@ ICMP_ULT
unsigned less than
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
static const Function * getParent(const Value *V)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
StringRef getName() const
Return a constant reference to the value's name.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
LLVMContext & getContext() const
Get the context in which this basic block lives.
static bool runOnFunction(Function &F, bool PostInlining)
static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV, const SCEV *Step, ICmpInst::Predicate Pred, unsigned LatchBrExitIdx, Loop *L, ScalarEvolution &SE)
Given a loop with an increasing induction variable, is it possible to safely calculate the bounds of ...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
const Loop * getLoop() const
static ConstantInt * getTrue(LLVMContext &Context)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
This node represents a polynomial recurrence on the trip count of the specified loop.
constexpr unsigned BitWidth
BlockT * getHeader() const
static bool isProfitableToTransform(const Loop &L, const BranchInst *BI)
static cl::opt< bool > PrintChangedLoops("irce-print-changed-loops", cl::Hidden, cl::init(false))
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
@ ICMP_SGE
signed greater or equal
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Analysis pass which computes a DominatorTree.
Pass interface - Implemented by all 'passes'.
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are d...
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
@ ICMP_UGT
unsigned greater than
static cl::opt< unsigned > MinRuntimeIterations("irce-min-runtime-iterations", cl::Hidden, cl::init(10))
const BasicBlock * getParent() const
static cl::opt< bool > AllowNarrowLatchCondition("irce-allow-narrow-latch", cl::Hidden, cl::init(true), cl::desc("If set to true, IRCE may eliminate wide range checks in loops " "with narrow latch condition."))
Predicate getPredicate() const
Return the predicate for this instruction.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Type * getType() const
Return the LLVM type of this SCEV expression.
A container for analyses that lazily runs them and caches their results.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FunctionPass class - This class is used to implement most global optimizations.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
void print(raw_ostream &OS, bool Verbose=false, bool PrintNested=true, unsigned Depth=0) const
Print loop with all the BBs inside it.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
Value * getOperand(unsigned i) const
Conditional or Unconditional Branch instruction.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * getSMinExpr(const SCEV *LHS, const SCEV *RHS)
static cl::opt< unsigned > LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, cl::init(64))
LLVM Value Representation.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV, const SCEV *Step, ICmpInst::Predicate Pred, unsigned LatchBrExitIdx, Loop *L, ScalarEvolution &SE)
Given a loop with an deccreasing induction variable, is it possible to safely calculate the bounds of...
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
BasicBlock * getSuccessor(unsigned i) const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Analysis pass that exposes the LoopInfo for a function.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static Optional< InductiveRangeCheck::Range > IntersectUnsignedRange(ScalarEvolution &SE, const Optional< InductiveRangeCheck::Range > &R1, const InductiveRangeCheck::Range &R2)