LLVM 23.0.0git
UnifyLoopExits.cpp
Go to the documentation of this file.
1//===- UnifyLoopExits.cpp - Redirect exiting edges to one block -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// For each natural loop with multiple exit blocks, this pass creates a new
10// block N such that all exiting blocks now branch to N, and then control flow
11// is redistributed to all the original exit blocks.
12//
13// Limitation: This assumes that all terminators in the CFG are direct branches
14// (the "br" instruction). The presence of any other control flow
15// such as indirectbr or switch will cause an assert.
16// The callbr terminator is supported by creating intermediate
17// target blocks that unconditionally branch to the original target
18// blocks. These intermediate target blocks can then be redirected
19// through the ControlFlowHub as usual.
20//
21//===----------------------------------------------------------------------===//
22
24#include "llvm/ADT/MapVector.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/Dominators.h"
34
35#define DEBUG_TYPE "unify-loop-exits"
36
37using namespace llvm;
38
40 "max-booleans-in-control-flow-hub", cl::init(32), cl::Hidden,
41 cl::desc("Set the maximum number of outgoing blocks for using a boolean "
42 "value to record the exiting block in the ControlFlowHub."));
43
44namespace {
45struct UnifyLoopExitsLegacyPass : public FunctionPass {
46 static char ID;
47 UnifyLoopExitsLegacyPass() : FunctionPass(ID) {
49 }
50
51 void getAnalysisUsage(AnalysisUsage &AU) const override {
52 AU.addRequired<LoopInfoWrapperPass>();
53 AU.addRequired<DominatorTreeWrapperPass>();
54 AU.addPreserved<LoopInfoWrapperPass>();
55 AU.addPreserved<DominatorTreeWrapperPass>();
56 }
57
58 bool runOnFunction(Function &F) override;
59};
60} // namespace
61
62char UnifyLoopExitsLegacyPass::ID = 0;
63
65 return new UnifyLoopExitsLegacyPass();
66}
67
68INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
69 "Fixup each natural loop to have a single exit block",
70 false /* Only looks at CFG */, false /* Analysis Pass */)
73INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
74 "Fixup each natural loop to have a single exit block",
75 false /* Only looks at CFG */, false /* Analysis Pass */)
76
77// The current transform introduces new control flow paths which may break the
78// SSA requirement that every def must dominate all its uses. For example,
79// consider a value D defined inside the loop that is used by some instruction
80// U outside the loop. It follows that D dominates U, since the original
81// program has valid SSA form. After merging the exits, all paths from D to U
82// now flow through the unified exit block. In addition, there may be other
83// paths that do not pass through D, but now reach the unified exit
84// block. Thus, D no longer dominates U.
85//
86// Restore the dominance by creating a phi for each such D at the new unified
87// loop exit. But when doing this, ignore any uses U that are in the new unified
88// loop exit, since those were introduced specially when the block was created.
89//
90// The use of SSAUpdater seems like overkill for this operation. The location
91// for creating the new PHI is well-known, and also the set of incoming blocks
92// to the new PHI.
95 BasicBlock *LoopExitBlock) {
96 using InstVector = SmallVector<Instruction *, 8>;
98 IIMap ExternalUsers;
99 for (auto *BB : L->blocks()) {
100 for (auto &I : *BB) {
101 for (auto &U : I.uses()) {
102 auto UserInst = cast<Instruction>(U.getUser());
103 auto UserBlock = UserInst->getParent();
104 if (UserBlock == LoopExitBlock)
105 continue;
106 if (L->contains(UserBlock))
107 continue;
108 LLVM_DEBUG(dbgs() << "added ext use for " << I.getName() << "("
109 << BB->getName() << ")"
110 << ": " << UserInst->getName() << "("
111 << UserBlock->getName() << ")"
112 << "\n");
113 ExternalUsers[&I].push_back(UserInst);
114 }
115 }
116 }
117
118 for (const auto &II : ExternalUsers) {
119 // For each Def used outside the loop, create NewPhi in
120 // LoopExitBlock. NewPhi receives Def only along exiting blocks that
121 // dominate it, while the remaining values are undefined since those paths
122 // didn't exist in the original CFG.
123 auto Def = II.first;
124 LLVM_DEBUG(dbgs() << "externally used: " << Def->getName() << "\n");
125 auto NewPhi =
126 PHINode::Create(Def->getType(), Incoming.size(),
127 Def->getName() + ".moved", LoopExitBlock->begin());
128 for (auto *In : Incoming) {
129 LLVM_DEBUG(dbgs() << "predecessor " << In->getName() << ": ");
130 if (Def->getParent() == In || DT.dominates(Def, In)) {
131 LLVM_DEBUG(dbgs() << "dominated\n");
132 NewPhi->addIncoming(Def, In);
133 } else {
134 LLVM_DEBUG(dbgs() << "not dominated\n");
135 NewPhi->addIncoming(PoisonValue::get(Def->getType()), In);
136 }
137 }
138
139 LLVM_DEBUG(dbgs() << "external users:");
140 for (auto *U : II.second) {
141 LLVM_DEBUG(dbgs() << " " << U->getName());
142 U->replaceUsesOfWith(Def, NewPhi);
143 }
144 LLVM_DEBUG(dbgs() << "\n");
145 }
146}
147
148static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
149 // To unify the loop exits, we need a list of the exiting blocks as
150 // well as exit blocks. The functions for locating these lists both
151 // traverse the entire loop body. It is more efficient to first
152 // locate the exiting blocks and then examine their successors to
153 // locate the exit blocks.
154 SmallVector<BasicBlock *, 8> ExitingBlocks;
155 L->getExitingBlocks(ExitingBlocks);
156
157 // No exit blocks, so nothing to do. Just return.
158 if (ExitingBlocks.empty())
159 return false;
160
161 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
162 SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;
163
164 // Redirect exiting edges through a control flow hub.
165 ControlFlowHub CHub;
166 bool Changed = false;
167
168 for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
169 BasicBlock *BB = ExitingBlocks[I];
171 BasicBlock *Succ0 = Branch->getSuccessor(0);
172 Succ0 = L->contains(Succ0) ? nullptr : Succ0;
173 CHub.addBranch(BB, Succ0);
174
175 LLVM_DEBUG(dbgs() << "Added extiting branch: " << printBasicBlock(BB)
176 << " -> " << printBasicBlock(Succ0) << '\n');
177 } else if (CondBrInst *Branch = dyn_cast<CondBrInst>(BB->getTerminator())) {
178 BasicBlock *Succ0 = Branch->getSuccessor(0);
179 Succ0 = L->contains(Succ0) ? nullptr : Succ0;
180
181 BasicBlock *Succ1 = Branch->getSuccessor(1);
182 Succ1 = L->contains(Succ1) ? nullptr : Succ1;
183 CHub.addBranch(BB, Succ0, Succ1);
184
185 LLVM_DEBUG(dbgs() << "Added extiting branch: " << printBasicBlock(BB)
186 << " -> " << printBasicBlock(Succ0)
187 << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
188 << '\n');
189 } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator())) {
190 for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {
191 BasicBlock *Succ = CallBr->getSuccessor(J);
192 if (L->contains(Succ))
193 continue;
194 bool UpdatedLI = false;
195 BasicBlock *NewSucc =
196 SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI);
197 // SplitCallBrEdge modifies the CFG because it creates an intermediate
198 // block. So we need to set the changed flag no matter what the
199 // ControlFlowHub is going to do later.
200 Changed = true;
201 // Even if CallBr and Succ do not have a common parent loop, we need to
202 // add the new target block to the parent loop of the current loop.
203 if (!UpdatedLI)
204 CallBrTargetBlocksToFix.push_back(NewSucc);
205 // ExitingBlocks is later used to restore SSA, so we need to make sure
206 // that the blocks used for phi nodes in the guard blocks match the
207 // predecessors of the guard blocks, which, in the case of callbr, are
208 // the new intermediate target blocks instead of the callbr blocks
209 // themselves.
210 ExitingBlocks[I] = NewSucc;
211 CHub.addBranch(NewSucc, Succ);
212 LLVM_DEBUG(dbgs() << "Added exiting branch: "
213 << printBasicBlock(NewSucc) << " -> "
214 << printBasicBlock(Succ) << '\n');
215 }
216 } else {
217 llvm_unreachable("unsupported block terminator");
218 }
219 }
220
222 BasicBlock *LoopExitBlock;
223 bool ChangedCFG;
224 std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(
225 &DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue());
226 ChangedCFG |= Changed;
227 if (!ChangedCFG)
228 return false;
229
230 restoreSSA(DT, L, ExitingBlocks, LoopExitBlock);
231
232#if defined(EXPENSIVE_CHECKS)
233 assert(DT.verify(DominatorTree::VerificationLevel::Full));
234#else
235 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
236#endif // EXPENSIVE_CHECKS
237 L->verifyLoop();
238
239 // The guard blocks were created outside the loop, so they need to become
240 // members of the parent loop.
241 // Same goes for the callbr target blocks. Although we try to add them to the
242 // smallest common parent loop of the callbr block and the corresponding
243 // original target block, there might not have been such a loop, in which case
244 // the newly created callbr target blocks are not part of any loop. For nested
245 // loops, this might result in them leading to a loop with multiple entry
246 // points.
247 if (auto *ParentLoop = L->getParentLoop()) {
248 for (auto *G : GuardBlocks) {
249 ParentLoop->addBasicBlockToLoop(G, LI);
250 }
251 for (auto *C : CallBrTargetBlocksToFix) {
252 ParentLoop->addBasicBlockToLoop(C, LI);
253 }
254 ParentLoop->verifyLoop();
255 }
256
257#if defined(EXPENSIVE_CHECKS)
258 LI.verify(DT);
259#endif // EXPENSIVE_CHECKS
260
261 return true;
262}
263
264static bool runImpl(LoopInfo &LI, DominatorTree &DT) {
265
266 bool Changed = false;
267 auto Loops = LI.getLoopsInPreorder();
268 for (auto *L : Loops) {
269 LLVM_DEBUG(dbgs() << "Processing loop:\n"; L->print(dbgs()));
270 Changed |= unifyLoopExits(DT, LI, L);
271 }
272 return Changed;
273}
274
275bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
276 LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
277 << "\n");
278 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
279 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
280
281 return runImpl(LI, DT);
282}
283
284namespace llvm {
285
288 LLVM_DEBUG(dbgs() << "===== Unifying loop exits in function " << F.getName()
289 << "\n");
290 auto &LI = AM.getResult<LoopAnalysis>(F);
291 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
292
293 if (!runImpl(LI, DT))
294 return PreservedAnalyses::all();
298 return PA;
299}
300} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
Hexagon Hardware Loops
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L)
unify loop Fixup each natural loop to have a single exit static false void restoreSSA(const DominatorTree &DT, const Loop *L, SmallVectorImpl< BasicBlock * > &Incoming, BasicBlock *LoopExitBlock)
static cl::opt< unsigned > MaxBooleansInControlFlowHub("max-booleans-in-control-flow-hub", cl::init(32), cl::Hidden, cl::desc("Set the maximum number of outgoing blocks for using a boolean " "value to record the exiting block in the ControlFlowHub."))
static bool runImpl(LoopInfo &LI, DominatorTree &DT)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
Conditional Branch instruction.
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Unconditional Branch instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &)
LLVM_ABI BasicBlock * SplitCallBrEdge(BasicBlock *CallBrBlock, BasicBlock *Succ, unsigned SuccIdx, DomTreeUpdater *DTU=nullptr, CycleInfo *CI=nullptr, LoopInfo *LI=nullptr, bool *UpdatedLI=nullptr)
Create a new intermediate target block for a callbr edge.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI FunctionPass * createUnifyLoopExitsPass()
LLVM_ABI Printable printBasicBlock(const BasicBlock *BB)
Print BasicBlock BB as an operand or print "<nullptr>" if BB is a nullptr.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Given a set of branch descriptors [BB, Succ0, Succ1], create a "hub" such that the control flow from ...
void addBranch(BasicBlock *BB, BasicBlock *Succ0, BasicBlock *Succ1=nullptr)
std::pair< BasicBlock *, bool > finalize(DomTreeUpdater *DTU, SmallVectorImpl< BasicBlock * > &GuardBlocks, const StringRef Prefix, std::optional< unsigned > MaxControlFlowBooleans=std::nullopt)
Return the unified loop exit block and a flag indicating if the CFG was changed at all.
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...