73 #define DEBUG_TYPE "loop-fusion"
76 STATISTIC(NumFusionCandidates,
"Number of candidates for loop fusion");
77 STATISTIC(InvalidPreheader,
"Loop has invalid preheader");
78 STATISTIC(InvalidHeader,
"Loop has invalid header");
79 STATISTIC(InvalidExitingBlock,
"Loop has invalid exiting blocks");
80 STATISTIC(InvalidExitBlock,
"Loop has invalid exit block");
81 STATISTIC(InvalidLatch,
"Loop has invalid latch");
82 STATISTIC(InvalidLoop,
"Loop is invalid");
83 STATISTIC(AddressTakenBB,
"Basic block has address taken");
84 STATISTIC(MayThrowException,
"Loop may throw an exception");
85 STATISTIC(ContainsVolatileAccess,
"Loop contains a volatile access");
86 STATISTIC(NotSimplifiedForm,
"Loop is not in simplified form");
87 STATISTIC(InvalidDependencies,
"Dependencies prevent fusion");
88 STATISTIC(UnknownTripCount,
"Loop has unknown trip count");
89 STATISTIC(UncomputableTripCount,
"SCEV cannot compute trip count of loop");
90 STATISTIC(NonEqualTripCount,
"Loop trip counts are not the same");
91 STATISTIC(NonAdjacent,
"Loops are not adjacent");
94 "Loop has a non-empty preheader with instructions that cannot be moved");
95 STATISTIC(FusionNotBeneficial,
"Fusion is not beneficial");
96 STATISTIC(NonIdenticalGuards,
"Candidates have different guards");
97 STATISTIC(NonEmptyExitBlock,
"Candidate has a non-empty exit block with "
98 "instructions that cannot be moved");
99 STATISTIC(NonEmptyGuardBlock,
"Candidate has a non-empty guard block with "
100 "instructions that cannot be moved");
101 STATISTIC(NotRotated,
"Candidate is not rotated");
103 "The second candidate is guarded while the first one is not");
112 "loop-fusion-dependence-analysis",
113 cl::desc(
"Which dependence analysis should loop fusion use?"),
115 "Use the scalar evolution interface"),
117 "Use the dependence analysis interface"),
119 "Use all available analyses")),
124 cl::desc(
"Max number of iterations to be peeled from a loop, such that "
125 "fusion can take place"));
130 cl::desc(
"Enable verbose debugging for Loop Fusion"),
145 struct FusionCandidate {
189 : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
190 ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
191 Latch(L->getLoopLatch()), L(L), Valid(
true),
192 GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(
canPeel(L)),
193 Peeled(
false), DT(DT), PDT(PDT), ORE(ORE) {
200 if (
BB->hasAddressTaken()) {
213 if (
SI->isVolatile()) {
219 if (
LoadInst *LI = dyn_cast<LoadInst>(&
I)) {
220 if (LI->isVolatile()) {
226 if (
I.mayWriteToMemory())
227 MemWrites.push_back(&
I);
228 if (
I.mayReadFromMemory())
229 MemReads.push_back(&
I);
236 return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&
247 "Exiting Blocks is out of sync");
266 void updateAfterPeeling() {
283 assert(GuardBranch &&
"Only valid on guarded loops.");
285 "Expecting guard to be a conditional branch.");
293 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
295 dbgs() <<
"\tGuardBranch: ";
297 dbgs() << *GuardBranch;
301 << (GuardBranch ? GuardBranch->
getName() :
"nullptr") <<
"\n"
302 <<
"\tPreheader: " << (Preheader ? Preheader->
getName() :
"nullptr")
304 <<
"\tHeader: " << (Header ? Header->
getName() :
"nullptr") <<
"\n"
306 << (ExitingBlock ? ExitingBlock->
getName() :
"nullptr") <<
"\n"
307 <<
"\tExitBB: " << (ExitBlock ? ExitBlock->
getName() :
"nullptr")
309 <<
"\tLatch: " << (Latch ? Latch->
getName() :
"nullptr") <<
"\n"
311 << (getEntryBlock() ? getEntryBlock()->getName() :
"nullptr")
327 ++InvalidExitingBlock;
341 <<
" trip count not computable!\n");
347 <<
" is not in simplified form!\n");
374 assert(L && Preheader &&
"Fusion candidate not initialized properly!");
375 #if LLVM_ENABLE_STATS
380 <<
"Loop is not a candidate for fusion: " << Stat.getDesc());
386 struct FusionCandidateCompare {
391 bool operator()(
const FusionCandidate &
LHS,
392 const FusionCandidate &
RHS)
const {
400 assert(DT &&
LHS.PDT &&
"Expecting valid dominator tree");
403 if (DT->
dominates(RHSEntryBlock, LHSEntryBlock)) {
406 assert(
LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
410 if (DT->
dominates(LHSEntryBlock, RHSEntryBlock)) {
412 assert(
LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
420 "No dominance relationship between these fusion candidates!");
436 using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
441 const FusionCandidate &
FC) {
443 OS <<
FC.Preheader->getName();
451 const FusionCandidateSet &CandSet) {
452 for (
const FusionCandidate &
FC : CandSet)
459 printFusionCandidates(
const FusionCandidateCollection &FusionCandidates) {
460 dbgs() <<
"Fusion Candidates: \n";
461 for (
const auto &CandidateSet : FusionCandidates) {
462 dbgs() <<
"*** Fusion Candidate Set ***\n";
463 dbgs() << CandidateSet;
464 dbgs() <<
"****************************\n";
475 struct LoopDepthTree {
477 using iterator = LoopsOnLevelTy::iterator;
478 using const_iterator = LoopsOnLevelTy::const_iterator;
482 LoopsOnLevel.emplace_back(LoopVector(LI.
rbegin(), LI.
rend()));
487 bool isRemovedLoop(
const Loop *L)
const {
return RemovedLoops.count(L); }
491 void removeLoop(
const Loop *L) { RemovedLoops.insert(L); }
495 LoopsOnLevelTy LoopsOnNextLevel;
497 for (
const LoopVector &LV : *
this)
499 if (!isRemovedLoop(L) && L->
begin() != L->
end())
500 LoopsOnNextLevel.emplace_back(LoopVector(L->
begin(), L->
end()));
502 LoopsOnLevel = LoopsOnNextLevel;
503 RemovedLoops.clear();
507 bool empty()
const {
return size() == 0; }
508 size_t size()
const {
return LoopsOnLevel.size() - RemovedLoops.size(); }
509 unsigned getDepth()
const {
return Depth; }
511 iterator
begin() {
return LoopsOnLevel.
begin(); }
512 iterator
end() {
return LoopsOnLevel.
end(); }
513 const_iterator
begin()
const {
return LoopsOnLevel.
begin(); }
514 const_iterator
end()
const {
return LoopsOnLevel.
end(); }
525 LoopsOnLevelTy LoopsOnLevel;
529 static void printLoopVector(
const LoopVector &LV) {
530 dbgs() <<
"****************************\n";
533 dbgs() <<
"****************************\n";
540 FusionCandidateCollection FusionCandidates;
560 : LDT(LI), DTU(DT, PDT,
DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
561 DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC),
TTI(
TTI) {}
573 LLVM_DEBUG(
dbgs() <<
"Performing Loop Fusion on function " <<
F.getName()
575 bool Changed =
false;
577 while (!LDT.empty()) {
578 LLVM_DEBUG(
dbgs() <<
"Got " << LDT.size() <<
" loop sets for depth "
579 << LDT.getDepth() <<
"\n";);
581 for (
const LoopVector &LV : LDT) {
582 assert(LV.size() > 0 &&
"Empty loop set was build!");
591 dbgs() <<
" Visit loop set (#" << LV.size() <<
"):\n";
597 collectFusionCandidates(LV);
598 Changed |= fuseCandidates();
609 FusionCandidates.clear();
634 const FusionCandidate &FC1)
const {
635 assert(FC0.Preheader && FC1.Preheader &&
"Expecting valid preheaders");
644 void collectFusionCandidates(
const LoopVector &LV) {
648 FusionCandidate CurrCand(L, DT, &PDT, ORE, PP);
649 if (!CurrCand.isEligibleForFusion(SE))
657 bool FoundSet =
false;
659 for (
auto &CurrCandSet : FusionCandidates) {
661 CurrCandSet.insert(CurrCand);
666 <<
" to existing candidate set\n");
677 FusionCandidateSet NewCandSet;
678 NewCandSet.insert(CurrCand);
679 FusionCandidates.push_back(NewCandSet);
681 NumFusionCandidates++;
690 bool isBeneficialFusion(
const FusionCandidate &FC0,
691 const FusionCandidate &FC1) {
703 std::pair<bool, Optional<unsigned>>
704 haveIdenticalTripCounts(
const FusionCandidate &FC0,
705 const FusionCandidate &FC1)
const {
708 if (isa<SCEVCouldNotCompute>(TripCount0)) {
709 UncomputableTripCount++;
710 LLVM_DEBUG(
dbgs() <<
"Trip count of first loop could not be computed!");
711 return {
false,
None};
715 if (isa<SCEVCouldNotCompute>(TripCount1)) {
716 UncomputableTripCount++;
717 LLVM_DEBUG(
dbgs() <<
"Trip count of second loop could not be computed!");
718 return {
false,
None};
722 << *TripCount1 <<
" are "
723 << (TripCount0 == TripCount1 ?
"identical" :
"different")
726 if (TripCount0 == TripCount1)
730 "determining the difference between trip counts\n");
739 if (TC0 == 0 || TC1 == 0) {
740 LLVM_DEBUG(
dbgs() <<
"Loop(s) do not have a single exit point or do not "
741 "have a constant number of iterations. Peeling "
742 "is not benefical\n");
743 return {
false,
None};
747 int Diff = TC0 - TC1;
753 dbgs() <<
"Difference is less than 0. FC1 (second loop) has more "
754 "iterations than the first one. Currently not supported\n");
757 LLVM_DEBUG(
dbgs() <<
"Difference in loop trip count is: " << Difference
760 return {
false, Difference};
763 void peelFusionCandidate(FusionCandidate &FC0,
const FusionCandidate &FC1,
764 unsigned PeelCount) {
765 assert(FC0.AbleToPeel &&
"Should be able to peel loop");
768 <<
" iterations of the first loop. \n");
770 FC0.Peeled =
peelLoop(FC0.L, PeelCount, &LI, &SE, DT, &AC,
true);
775 auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
777 assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&
778 "Loops should have identical trip counts after peeling");
786 FC0.updateAfterPeeling();
797 FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
802 if (Pred != FC0.ExitBlock) {
811 BasicBlock *Succ = CurrentBranch->getSuccessor(0);
813 Succ = CurrentBranch->getSuccessor(1);
821 dbgs() <<
"Sucessfully peeled " << FC0.PP.PeelCount
822 <<
" iterations from the first loop.\n"
823 "Both Loops have the same number of iterations now.\n");
834 bool fuseCandidates() {
836 LLVM_DEBUG(printFusionCandidates(FusionCandidates));
837 for (
auto &CandidateSet : FusionCandidates) {
838 if (CandidateSet.size() < 2)
842 << CandidateSet <<
"\n");
844 for (
auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
845 assert(!LDT.isRemovedLoop(FC0->L) &&
846 "Should not have removed loops in CandidateSet!");
848 for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
849 assert(!LDT.isRemovedLoop(FC1->L) &&
850 "Should not have removed loops in CandidateSet!");
852 LLVM_DEBUG(
dbgs() <<
"Attempting to fuse candidate \n"; FC0->dump();
853 dbgs() <<
" with\n"; FC1->dump();
dbgs() <<
"\n");
863 std::pair<bool, Optional<unsigned>> IdenticalTripCountRes =
864 haveIdenticalTripCounts(*FC0, *FC1);
865 bool SameTripCount = IdenticalTripCountRes.first;
870 if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
873 <<
"Difference in loop trip counts: " << *TCDifference
874 <<
" is greater than maximum peel count specificed: "
879 SameTripCount =
true;
883 if (!SameTripCount) {
884 LLVM_DEBUG(
dbgs() <<
"Fusion candidates do not have identical trip "
885 "counts. Not fusing.\n");
886 reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
891 if (!isAdjacent(*FC0, *FC1)) {
893 <<
"Fusion candidates are not adjacent. Not fusing.\n");
894 reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
898 if (!FC0->GuardBranch && FC1->GuardBranch) {
900 "first one is not. Not fusing.\n");
901 reportLoopFusion<OptimizationRemarkMissed>(
902 *FC0, *FC1, OnlySecondCandidateIsGuarded);
908 if (FC0->GuardBranch && FC1->GuardBranch &&
909 !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
911 "guards. Not Fusing.\n");
912 reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
918 *FC0->Preheader->getTerminator(), DT, &PDT,
921 "instructions in preheader. Not fusing.\n");
922 reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
927 if (FC0->GuardBranch) {
928 assert(FC1->GuardBranch &&
"Expecting valid FC1 guard branch");
931 *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
934 "instructions in exit block. Not fusing.\n");
935 reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
941 *FC1->GuardBranch->getParent(),
942 *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,
945 <<
"Fusion candidate contains unsafe "
946 "instructions in guard block. Not fusing.\n");
947 reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
955 if (!dependencesAllowFusion(*FC0, *FC1)) {
956 LLVM_DEBUG(
dbgs() <<
"Memory dependencies do not allow fusion!\n");
957 reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
958 InvalidDependencies);
962 bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
964 <<
"\tFusion appears to be "
965 << (BeneficialToFuse ?
"" :
"un") <<
"profitable!\n");
966 if (!BeneficialToFuse) {
967 reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
968 FusionNotBeneficial);
978 FusionCandidate FC0Copy = *FC0;
981 bool Peel = TCDifference && *TCDifference > 0;
983 peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
989 reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
992 FusionCandidate FusedCand(
993 performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
996 assert(FusedCand.isEligibleForFusion(SE) &&
997 "Fused candidate should be eligible for fusion!");
1000 LDT.removeLoop(FC1->L);
1002 CandidateSet.erase(FC0);
1003 CandidateSet.erase(FC1);
1005 auto InsertPos = CandidateSet.insert(FusedCand);
1007 assert(InsertPos.second &&
1008 "Unable to insert TargetCandidate in CandidateSet!");
1013 FC0 = FC1 = InsertPos.first;
1015 LLVM_DEBUG(
dbgs() <<
"Candidate Set (after fusion): " << CandidateSet
1036 if (ExprL == &OldL) {
1041 if (OldL.contains(ExprL)) {
1043 if (!UseMax || !Pos || !Expr->
isAffine()) {
1055 bool wasValidSCEV()
const {
return Valid; }
1059 const Loop &OldL, &NewL;
1075 LLVM_DEBUG(
dbgs() <<
" Access function check: " << *SCEVPtr0 <<
" vs "
1076 << *SCEVPtr1 <<
"\n");
1078 AddRecLoopReplacer
Rewriter(SE, L0, L1);
1079 SCEVPtr0 =
Rewriter.visit(SCEVPtr0);
1082 LLVM_DEBUG(
dbgs() <<
" Access function after rewrite: " << *SCEVPtr0
1083 <<
" [Valid: " <<
Rewriter.wasValidSCEV() <<
"]\n");
1093 auto HasNonLinearDominanceRelation = [&](
const SCEV *
S) {
1109 << (IsAlwaysGE ?
" >= " :
" may < ") << *SCEVPtr1
1118 bool dependencesAllowFusion(
const FusionCandidate &FC0,
1125 << DepChoice <<
"\n");
1128 switch (DepChoice) {
1130 return accessDiffIsPositive(*FC0.L, *FC1.L, I0,
I1, AnyDep);
1132 auto DepResult = DI.
depends(&I0, &
I1,
true);
1138 dbgs() <<
" [#l: " << DepResult->getLevels() <<
"][Ordered: "
1139 << (DepResult->isOrdered() ?
"true" :
"false")
1141 LLVM_DEBUG(
dbgs() <<
"DepResult Levels: " << DepResult->getLevels()
1146 if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
1148 dbgs() <<
"TODO: Implement pred/succ dependence handling!\n");
1155 return dependencesAllowFusion(FC0, FC1, I0,
I1, AnyDep,
1157 dependencesAllowFusion(FC0, FC1, I0,
I1, AnyDep,
1165 bool dependencesAllowFusion(
const FusionCandidate &FC0,
1166 const FusionCandidate &FC1) {
1167 LLVM_DEBUG(
dbgs() <<
"Check if " << FC0 <<
" can be fused with " << FC1
1169 assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
1174 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
1177 InvalidDependencies++;
1181 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1,
1184 InvalidDependencies++;
1191 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
1194 InvalidDependencies++;
1198 if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1,
1201 InvalidDependencies++;
1210 for (
auto &
Op :
I.operands())
1212 if (FC0.L->contains(
Def->getParent())) {
1213 InvalidDependencies++;
1229 bool isAdjacent(
const FusionCandidate &FC0,
1230 const FusionCandidate &FC1)
const {
1232 if (FC0.GuardBranch)
1233 return FC0.getNonLoopBlock() == FC1.getEntryBlock();
1235 return FC0.ExitBlock == FC1.getEntryBlock();
1250 bool haveIdenticalGuards(
const FusionCandidate &FC0,
1251 const FusionCandidate &FC1)
const {
1252 assert(FC0.GuardBranch && FC1.GuardBranch &&
1253 "Expecting FC0 and FC1 to be guarded loops.");
1255 if (
auto FC0CmpInst =
1256 dyn_cast<Instruction>(FC0.GuardBranch->getCondition()))
1257 if (
auto FC1CmpInst =
1258 dyn_cast<Instruction>(FC1.GuardBranch->getCondition()))
1259 if (!FC0CmpInst->isIdenticalTo(FC1CmpInst))
1265 if (FC0.GuardBranch->getSuccessor(0) == FC0.Preheader)
1266 return (FC1.GuardBranch->getSuccessor(0) == FC1.Preheader);
1268 return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
1273 void simplifyLatchBranch(
const FusionCandidate &
FC)
const {
1274 BranchInst *FCLatchBranch = dyn_cast<BranchInst>(
FC.Latch->getTerminator());
1275 if (FCLatchBranch) {
1278 "Expecting the two successors of FCLatchBranch to be the same");
1287 void mergeLatch(
const FusionCandidate &FC0,
const FusionCandidate &FC1) {
1289 if (
BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) {
1324 Loop *performFusion(
const FusionCandidate &FC0,
const FusionCandidate &FC1) {
1325 assert(FC0.isValid() && FC1.isValid() &&
1326 "Expecting valid fusion candidates");
1329 dbgs() <<
"Fusion Candidate 1: \n"; FC1.dump(););
1338 if (FC0.GuardBranch)
1339 return fuseGuardedLoops(FC0, FC1);
1342 (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
1343 assert(FC1.Preheader->size() == 1 &&
1344 FC1.Preheader->getSingleSuccessor() == FC1.Header);
1356 if (FC0.ExitingBlock != FC0.Latch)
1357 for (
PHINode &PHI : FC0.Header->phis())
1358 OriginalFC0PHIs.push_back(&PHI);
1361 FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
1362 FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
1385 FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
1396 FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
1400 FC0.ExitBlock->getTerminator()->eraseFromParent();
1408 FC1.Preheader->getTerminator()->eraseFromParent();
1414 while (
PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
1417 if (PHI->hasNUsesOrMore(1))
1418 PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
1420 PHI->eraseFromParent();
1428 for (
PHINode *LCPHI : OriginalFC0PHIs) {
1429 int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
1430 assert(L1LatchBBIdx >= 0 &&
1431 "Expected loop carried value to be rewired at this point!");
1433 Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
1436 LCV->
getType(), 2, LCPHI->getName() +
".afterFC0", L1HeaderIP);
1441 LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
1445 FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
1446 FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
1450 simplifyLatchBranch(FC0);
1454 if (FC0.Latch != FC0.ExitingBlock)
1459 FC0.Latch, FC0.Header));
1461 FC1.Latch, FC0.Header));
1463 FC1.Latch, FC1.Header));
1486 mergeLatch(FC0, FC1);
1491 FC0.L->addBlockEntry(
BB);
1492 FC1.L->removeBlockFromLoop(
BB);
1497 while (!FC1.L->isInnermost()) {
1498 const auto &ChildLoopIt = FC1.L->begin();
1499 Loop *ChildLoop = *ChildLoopIt;
1500 FC1.L->removeChildLoop(ChildLoopIt);
1501 FC0.L->addChildLoop(ChildLoop);
1532 template <
typename RemarkKind>
1533 void reportLoopFusion(
const FusionCandidate &FC0,
const FusionCandidate &FC1,
1535 assert(FC0.Preheader && FC1.Preheader &&
1536 "Expecting valid fusion candidates");
1537 using namespace ore;
1538 #if LLVM_ENABLE_STATS
1540 ORE.
emit(RemarkKind(
DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(),
1542 <<
"[" << FC0.Preheader->getParent()->getName()
1543 <<
"]: " <<
NV(
"Cand1",
StringRef(FC0.Preheader->getName()))
1544 <<
" and " <<
NV(
"Cand2",
StringRef(FC1.Preheader->getName()))
1545 <<
": " << Stat.getDesc());
1564 Loop *fuseGuardedLoops(
const FusionCandidate &FC0,
1565 const FusionCandidate &FC1) {
1566 assert(FC0.GuardBranch && FC1.GuardBranch &&
"Expecting guarded loops");
1568 BasicBlock *FC0GuardBlock = FC0.GuardBranch->getParent();
1569 BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
1570 BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
1571 BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
1572 BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor();
1579 (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,
1586 assert(FC0NonLoopBlock == FC1GuardBlock &&
"Loops are not adjacent");
1599 FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
1601 BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
1605 FC1.GuardBranch->eraseFromParent();
1623 FC0ExitBlockSuccessor);
1627 "Expecting guard block to have no predecessors");
1629 "Expecting guard block to have no successors");
1644 if (FC0.ExitingBlock != FC0.Latch)
1645 for (
PHINode &PHI : FC0.Header->phis())
1646 OriginalFC0PHIs.push_back(&PHI);
1648 assert(OriginalFC0PHIs.empty() &&
"Expecting OriginalFC0PHIs to be empty!");
1651 FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
1652 FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
1667 FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
1683 FC0.ExitBlock->getTerminator()->eraseFromParent();
1689 FC1.Preheader->getTerminator()->eraseFromParent();
1695 while (
PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
1698 if (PHI->hasNUsesOrMore(1))
1699 PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
1701 PHI->eraseFromParent();
1709 for (
PHINode *LCPHI : OriginalFC0PHIs) {
1710 int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
1711 assert(L1LatchBBIdx >= 0 &&
1712 "Expected loop carried value to be rewired at this point!");
1714 Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
1717 LCV->
getType(), 2, LCPHI->getName() +
".afterFC0", L1HeaderIP);
1722 LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
1728 FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
1729 FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
1733 simplifyLatchBranch(FC0);
1737 if (FC0.Latch != FC0.ExitingBlock)
1742 FC0.Latch, FC0.Header));
1744 FC1.Latch, FC0.Header));
1746 FC1.Latch, FC1.Header));
1762 DTU.
deleteBB(FC0ExitBlockSuccessor);
1778 mergeLatch(FC0, FC1);
1783 FC0.L->addBlockEntry(
BB);
1784 FC1.L->removeBlockFromLoop(
BB);
1789 while (!FC1.L->isInnermost()) {
1790 const auto &ChildLoopIt = FC1.L->begin();
1791 Loop *ChildLoop = *ChildLoopIt;
1792 FC1.L->removeChildLoop(ChildLoopIt);
1793 FC0.L->addChildLoop(ChildLoop);
1839 if (skipFunction(
F))
1841 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1842 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1843 auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
1844 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1845 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1846 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1847 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1849 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
1852 LoopFuser LF(LI, DT, DI, SE, PDT, ORE,
DL, AC,
TTI);
1853 return LF.fuseLoops(
F);
1869 LoopFuser LF(LI, DT, DI, SE, PDT, ORE,
DL, AC,
TTI);
1870 bool Changed = LF.fuseLoops(
F);