File: | build/source/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp |
Warning: | line 3361, column 21 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===// | ||||||
2 | // | ||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||
6 | // | ||||||
7 | //===----------------------------------------------------------------------===// | ||||||
8 | |||||||
9 | #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" | ||||||
10 | #include "llvm/ADT/DenseMap.h" | ||||||
11 | #include "llvm/ADT/STLExtras.h" | ||||||
12 | #include "llvm/ADT/Sequence.h" | ||||||
13 | #include "llvm/ADT/SetVector.h" | ||||||
14 | #include "llvm/ADT/SmallPtrSet.h" | ||||||
15 | #include "llvm/ADT/SmallVector.h" | ||||||
16 | #include "llvm/ADT/Statistic.h" | ||||||
17 | #include "llvm/ADT/Twine.h" | ||||||
18 | #include "llvm/Analysis/AssumptionCache.h" | ||||||
19 | #include "llvm/Analysis/BlockFrequencyInfo.h" | ||||||
20 | #include "llvm/Analysis/CFG.h" | ||||||
21 | #include "llvm/Analysis/CodeMetrics.h" | ||||||
22 | #include "llvm/Analysis/DomTreeUpdater.h" | ||||||
23 | #include "llvm/Analysis/GuardUtils.h" | ||||||
24 | #include "llvm/Analysis/LoopAnalysisManager.h" | ||||||
25 | #include "llvm/Analysis/LoopInfo.h" | ||||||
26 | #include "llvm/Analysis/LoopIterator.h" | ||||||
27 | #include "llvm/Analysis/LoopPass.h" | ||||||
28 | #include "llvm/Analysis/MemorySSA.h" | ||||||
29 | #include "llvm/Analysis/MemorySSAUpdater.h" | ||||||
30 | #include "llvm/Analysis/MustExecute.h" | ||||||
31 | #include "llvm/Analysis/ProfileSummaryInfo.h" | ||||||
32 | #include "llvm/Analysis/ScalarEvolution.h" | ||||||
33 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||
34 | #include "llvm/Analysis/ValueTracking.h" | ||||||
35 | #include "llvm/IR/BasicBlock.h" | ||||||
36 | #include "llvm/IR/Constant.h" | ||||||
37 | #include "llvm/IR/Constants.h" | ||||||
38 | #include "llvm/IR/Dominators.h" | ||||||
39 | #include "llvm/IR/Function.h" | ||||||
40 | #include "llvm/IR/IRBuilder.h" | ||||||
41 | #include "llvm/IR/InstrTypes.h" | ||||||
42 | #include "llvm/IR/Instruction.h" | ||||||
43 | #include "llvm/IR/Instructions.h" | ||||||
44 | #include "llvm/IR/IntrinsicInst.h" | ||||||
45 | #include "llvm/IR/PatternMatch.h" | ||||||
46 | #include "llvm/IR/ProfDataUtils.h" | ||||||
47 | #include "llvm/IR/Use.h" | ||||||
48 | #include "llvm/IR/Value.h" | ||||||
49 | #include "llvm/InitializePasses.h" | ||||||
50 | #include "llvm/Pass.h" | ||||||
51 | #include "llvm/Support/Casting.h" | ||||||
52 | #include "llvm/Support/CommandLine.h" | ||||||
53 | #include "llvm/Support/Debug.h" | ||||||
54 | #include "llvm/Support/ErrorHandling.h" | ||||||
55 | #include "llvm/Support/GenericDomTree.h" | ||||||
56 | #include "llvm/Support/InstructionCost.h" | ||||||
57 | #include "llvm/Support/raw_ostream.h" | ||||||
58 | #include "llvm/Transforms/Scalar/LoopPassManager.h" | ||||||
59 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||||||
60 | #include "llvm/Transforms/Utils/Cloning.h" | ||||||
61 | #include "llvm/Transforms/Utils/Local.h" | ||||||
62 | #include "llvm/Transforms/Utils/LoopUtils.h" | ||||||
63 | #include "llvm/Transforms/Utils/ValueMapper.h" | ||||||
64 | #include <algorithm> | ||||||
65 | #include <cassert> | ||||||
66 | #include <iterator> | ||||||
67 | #include <numeric> | ||||||
68 | #include <optional> | ||||||
69 | #include <utility> | ||||||
70 | |||||||
71 | #define DEBUG_TYPE"simple-loop-unswitch" "simple-loop-unswitch" | ||||||
72 | |||||||
73 | using namespace llvm; | ||||||
74 | using namespace llvm::PatternMatch; | ||||||
75 | |||||||
76 | STATISTIC(NumBranches, "Number of branches unswitched")static llvm::Statistic NumBranches = {"simple-loop-unswitch", "NumBranches", "Number of branches unswitched"}; | ||||||
77 | STATISTIC(NumSwitches, "Number of switches unswitched")static llvm::Statistic NumSwitches = {"simple-loop-unswitch", "NumSwitches", "Number of switches unswitched"}; | ||||||
78 | STATISTIC(NumSelects, "Number of selects turned into branches for unswitching")static llvm::Statistic NumSelects = {"simple-loop-unswitch", "NumSelects" , "Number of selects turned into branches for unswitching"}; | ||||||
79 | STATISTIC(NumGuards, "Number of guards turned into branches for unswitching")static llvm::Statistic NumGuards = {"simple-loop-unswitch", "NumGuards" , "Number of guards turned into branches for unswitching"}; | ||||||
80 | STATISTIC(NumTrivial, "Number of unswitches that are trivial")static llvm::Statistic NumTrivial = {"simple-loop-unswitch", "NumTrivial" , "Number of unswitches that are trivial"}; | ||||||
81 | STATISTIC(static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch" , "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped" } | ||||||
82 | NumCostMultiplierSkipped,static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch" , "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped" } | ||||||
83 | "Number of unswitch candidates that had their cost multiplier skipped")static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch" , "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped" }; | ||||||
84 | STATISTIC(NumInvariantConditionsInjected,static llvm::Statistic NumInvariantConditionsInjected = {"simple-loop-unswitch" , "NumInvariantConditionsInjected", "Number of invariant conditions injected and unswitched" } | ||||||
85 | "Number of invariant conditions injected and unswitched")static llvm::Statistic NumInvariantConditionsInjected = {"simple-loop-unswitch" , "NumInvariantConditionsInjected", "Number of invariant conditions injected and unswitched" }; | ||||||
86 | |||||||
87 | static cl::opt<bool> EnableNonTrivialUnswitch( | ||||||
88 | "enable-nontrivial-unswitch", cl::init(false), cl::Hidden, | ||||||
89 | cl::desc("Forcibly enables non-trivial loop unswitching rather than " | ||||||
90 | "following the configuration passed into the pass.")); | ||||||
91 | |||||||
92 | static cl::opt<int> | ||||||
93 | UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, | ||||||
94 | cl::desc("The cost threshold for unswitching a loop.")); | ||||||
95 | |||||||
96 | static cl::opt<bool> EnableUnswitchCostMultiplier( | ||||||
97 | "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, | ||||||
98 | cl::desc("Enable unswitch cost multiplier that prohibits exponential " | ||||||
99 | "explosion in nontrivial unswitch.")); | ||||||
100 | static cl::opt<int> UnswitchSiblingsToplevelDiv( | ||||||
101 | "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, | ||||||
102 | cl::desc("Toplevel siblings divisor for cost multiplier.")); | ||||||
103 | static cl::opt<int> UnswitchNumInitialUnscaledCandidates( | ||||||
104 | "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, | ||||||
105 | cl::desc("Number of unswitch candidates that are ignored when calculating " | ||||||
106 | "cost multiplier.")); | ||||||
107 | static cl::opt<bool> UnswitchGuards( | ||||||
108 | "simple-loop-unswitch-guards", cl::init(true), cl::Hidden, | ||||||
109 | cl::desc("If enabled, simple loop unswitching will also consider " | ||||||
110 | "llvm.experimental.guard intrinsics as unswitch candidates.")); | ||||||
111 | static cl::opt<bool> DropNonTrivialImplicitNullChecks( | ||||||
112 | "simple-loop-unswitch-drop-non-trivial-implicit-null-checks", | ||||||
113 | cl::init(false), cl::Hidden, | ||||||
114 | cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " | ||||||
115 | "null checks to save time analyzing if we can keep it.")); | ||||||
116 | static cl::opt<unsigned> | ||||||
117 | MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", | ||||||
118 | cl::desc("Max number of memory uses to explore during " | ||||||
119 | "partial unswitching analysis"), | ||||||
120 | cl::init(100), cl::Hidden); | ||||||
121 | static cl::opt<bool> FreezeLoopUnswitchCond( | ||||||
122 | "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, | ||||||
123 | cl::desc("If enabled, the freeze instruction will be added to condition " | ||||||
124 | "of loop unswitch to prevent miscompilation.")); | ||||||
125 | |||||||
126 | static cl::opt<bool> InjectInvariantConditions( | ||||||
127 | "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, | ||||||
128 | cl::desc("Whether we should inject new invariants and unswitch them to " | ||||||
129 | "eliminate some existing (non-invariant) conditions."), | ||||||
130 | cl::init(true)); | ||||||
131 | |||||||
132 | static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold( | ||||||
133 | "simple-loop-unswitch-inject-invariant-condition-hotness-threshold", | ||||||
134 | cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " | ||||||
135 | "unswitch on them to eliminate branches that are " | ||||||
136 | "not-taken 1/<this option> times or less."), | ||||||
137 | cl::init(16)); | ||||||
138 | |||||||
139 | namespace { | ||||||
140 | struct CompareDesc { | ||||||
141 | BranchInst *Term; | ||||||
142 | Value *Invariant; | ||||||
143 | BasicBlock *InLoopSucc; | ||||||
144 | |||||||
145 | CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc) | ||||||
146 | : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {} | ||||||
147 | }; | ||||||
148 | |||||||
149 | struct InjectedInvariant { | ||||||
150 | ICmpInst::Predicate Pred; | ||||||
151 | Value *LHS; | ||||||
152 | Value *RHS; | ||||||
153 | BasicBlock *InLoopSucc; | ||||||
154 | |||||||
155 | InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, | ||||||
156 | BasicBlock *InLoopSucc) | ||||||
157 | : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {} | ||||||
158 | }; | ||||||
159 | |||||||
160 | struct NonTrivialUnswitchCandidate { | ||||||
161 | Instruction *TI = nullptr; | ||||||
162 | TinyPtrVector<Value *> Invariants; | ||||||
163 | std::optional<InstructionCost> Cost; | ||||||
164 | std::optional<InjectedInvariant> PendingInjection; | ||||||
165 | NonTrivialUnswitchCandidate( | ||||||
166 | Instruction *TI, ArrayRef<Value *> Invariants, | ||||||
167 | std::optional<InstructionCost> Cost = std::nullopt, | ||||||
168 | std::optional<InjectedInvariant> PendingInjection = std::nullopt) | ||||||
169 | : TI(TI), Invariants(Invariants), Cost(Cost), | ||||||
170 | PendingInjection(PendingInjection) {}; | ||||||
171 | |||||||
172 | bool hasPendingInjection() const { return PendingInjection.has_value(); } | ||||||
173 | }; | ||||||
174 | } // end anonymous namespace. | ||||||
175 | |||||||
176 | // Helper to skip (select x, true, false), which matches both a logical AND and | ||||||
177 | // OR and can confuse code that tries to determine if \p Cond is either a | ||||||
178 | // logical AND or OR but not both. | ||||||
179 | static Value *skipTrivialSelect(Value *Cond) { | ||||||
180 | Value *CondNext; | ||||||
181 | while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero()))) | ||||||
182 | Cond = CondNext; | ||||||
183 | return Cond; | ||||||
184 | } | ||||||
185 | |||||||
186 | /// Collect all of the loop invariant input values transitively used by the | ||||||
187 | /// homogeneous instruction graph from a given root. | ||||||
188 | /// | ||||||
189 | /// This essentially walks from a root recursively through loop variant operands | ||||||
190 | /// which have perform the same logical operation (AND or OR) and finds all | ||||||
191 | /// inputs which are loop invariant. For some operations these can be | ||||||
192 | /// re-associated and unswitched out of the loop entirely. | ||||||
193 | static TinyPtrVector<Value *> | ||||||
194 | collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, | ||||||
195 | const LoopInfo &LI) { | ||||||
196 | assert(!L.isLoopInvariant(&Root) &&(static_cast <bool> (!L.isLoopInvariant(&Root) && "Only need to walk the graph if root itself is not invariant." ) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 197, __extension__ __PRETTY_FUNCTION__)) | ||||||
197 | "Only need to walk the graph if root itself is not invariant.")(static_cast <bool> (!L.isLoopInvariant(&Root) && "Only need to walk the graph if root itself is not invariant." ) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 197, __extension__ __PRETTY_FUNCTION__)); | ||||||
198 | TinyPtrVector<Value *> Invariants; | ||||||
199 | |||||||
200 | bool IsRootAnd = match(&Root, m_LogicalAnd()); | ||||||
201 | bool IsRootOr = match(&Root, m_LogicalOr()); | ||||||
202 | |||||||
203 | // Build a worklist and recurse through operators collecting invariants. | ||||||
204 | SmallVector<Instruction *, 4> Worklist; | ||||||
205 | SmallPtrSet<Instruction *, 8> Visited; | ||||||
206 | Worklist.push_back(&Root); | ||||||
207 | Visited.insert(&Root); | ||||||
208 | do { | ||||||
209 | Instruction &I = *Worklist.pop_back_val(); | ||||||
210 | for (Value *OpV : I.operand_values()) { | ||||||
211 | // Skip constants as unswitching isn't interesting for them. | ||||||
212 | if (isa<Constant>(OpV)) | ||||||
213 | continue; | ||||||
214 | |||||||
215 | // Add it to our result if loop invariant. | ||||||
216 | if (L.isLoopInvariant(OpV)) { | ||||||
217 | Invariants.push_back(OpV); | ||||||
218 | continue; | ||||||
219 | } | ||||||
220 | |||||||
221 | // If not an instruction with the same opcode, nothing we can do. | ||||||
222 | Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV)); | ||||||
223 | |||||||
224 | if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) || | ||||||
225 | (IsRootOr && match(OpI, m_LogicalOr())))) { | ||||||
226 | // Visit this operand. | ||||||
227 | if (Visited.insert(OpI).second) | ||||||
228 | Worklist.push_back(OpI); | ||||||
229 | } | ||||||
230 | } | ||||||
231 | } while (!Worklist.empty()); | ||||||
232 | |||||||
233 | return Invariants; | ||||||
234 | } | ||||||
235 | |||||||
236 | static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, | ||||||
237 | Constant &Replacement) { | ||||||
238 | assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?")(static_cast <bool> (!isa<Constant>(Invariant) && "Why are we unswitching on a constant?") ? void (0) : __assert_fail ("!isa<Constant>(Invariant) && \"Why are we unswitching on a constant?\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 238, __extension__ __PRETTY_FUNCTION__)); | ||||||
239 | |||||||
240 | // Replace uses of LIC in the loop with the given constant. | ||||||
241 | // We use make_early_inc_range as set invalidates the iterator. | ||||||
242 | for (Use &U : llvm::make_early_inc_range(Invariant->uses())) { | ||||||
243 | Instruction *UserI = dyn_cast<Instruction>(U.getUser()); | ||||||
244 | |||||||
245 | // Replace this use within the loop body. | ||||||
246 | if (UserI && L.contains(UserI)) | ||||||
247 | U.set(&Replacement); | ||||||
248 | } | ||||||
249 | } | ||||||
250 | |||||||
251 | /// Check that all the LCSSA PHI nodes in the loop exit block have trivial | ||||||
252 | /// incoming values along this edge. | ||||||
253 | static bool areLoopExitPHIsLoopInvariant(const Loop &L, | ||||||
254 | const BasicBlock &ExitingBB, | ||||||
255 | const BasicBlock &ExitBB) { | ||||||
256 | for (const Instruction &I : ExitBB) { | ||||||
257 | auto *PN = dyn_cast<PHINode>(&I); | ||||||
258 | if (!PN) | ||||||
259 | // No more PHIs to check. | ||||||
260 | return true; | ||||||
261 | |||||||
262 | // If the incoming value for this edge isn't loop invariant the unswitch | ||||||
263 | // won't be trivial. | ||||||
264 | if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB))) | ||||||
265 | return false; | ||||||
266 | } | ||||||
267 | llvm_unreachable("Basic blocks should never be empty!")::llvm::llvm_unreachable_internal("Basic blocks should never be empty!" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 267); | ||||||
268 | } | ||||||
269 | |||||||
270 | /// Copy a set of loop invariant values \p ToDuplicate and insert them at the | ||||||
271 | /// end of \p BB and conditionally branch on the copied condition. We only | ||||||
272 | /// branch on a single value. | ||||||
273 | static void buildPartialUnswitchConditionalBranch( | ||||||
274 | BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction, | ||||||
275 | BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, | ||||||
276 | const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) { | ||||||
277 | IRBuilder<> IRB(&BB); | ||||||
278 | |||||||
279 | SmallVector<Value *> FrozenInvariants; | ||||||
280 | for (Value *Inv : Invariants) { | ||||||
281 | if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT)) | ||||||
282 | Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr"); | ||||||
283 | FrozenInvariants.push_back(Inv); | ||||||
284 | } | ||||||
285 | |||||||
286 | Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants) | ||||||
287 | : IRB.CreateAnd(FrozenInvariants); | ||||||
288 | IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, | ||||||
289 | Direction ? &NormalSucc : &UnswitchedSucc); | ||||||
290 | } | ||||||
291 | |||||||
292 | /// Copy a set of loop invariant values, and conditionally branch on them. | ||||||
293 | static void buildPartialInvariantUnswitchConditionalBranch( | ||||||
294 | BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction, | ||||||
295 | BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, | ||||||
296 | MemorySSAUpdater *MSSAU) { | ||||||
297 | ValueToValueMapTy VMap; | ||||||
298 | for (auto *Val : reverse(ToDuplicate)) { | ||||||
299 | Instruction *Inst = cast<Instruction>(Val); | ||||||
300 | Instruction *NewInst = Inst->clone(); | ||||||
301 | NewInst->insertInto(&BB, BB.end()); | ||||||
302 | RemapInstruction(NewInst, VMap, | ||||||
303 | RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); | ||||||
304 | VMap[Val] = NewInst; | ||||||
305 | |||||||
306 | if (!MSSAU) | ||||||
307 | continue; | ||||||
308 | |||||||
309 | MemorySSA *MSSA = MSSAU->getMemorySSA(); | ||||||
310 | if (auto *MemUse = | ||||||
311 | dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) { | ||||||
312 | auto *DefiningAccess = MemUse->getDefiningAccess(); | ||||||
313 | // Get the first defining access before the loop. | ||||||
314 | while (L.contains(DefiningAccess->getBlock())) { | ||||||
315 | // If the defining access is a MemoryPhi, get the incoming | ||||||
316 | // value for the pre-header as defining access. | ||||||
317 | if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) | ||||||
318 | DefiningAccess = | ||||||
319 | MemPhi->getIncomingValueForBlock(L.getLoopPreheader()); | ||||||
320 | else | ||||||
321 | DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess(); | ||||||
322 | } | ||||||
323 | MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess, | ||||||
324 | NewInst->getParent(), | ||||||
325 | MemorySSA::BeforeTerminator); | ||||||
326 | } | ||||||
327 | } | ||||||
328 | |||||||
329 | IRBuilder<> IRB(&BB); | ||||||
330 | Value *Cond = VMap[ToDuplicate[0]]; | ||||||
331 | IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, | ||||||
332 | Direction ? &NormalSucc : &UnswitchedSucc); | ||||||
333 | } | ||||||
334 | |||||||
335 | /// Rewrite the PHI nodes in an unswitched loop exit basic block. | ||||||
336 | /// | ||||||
337 | /// Requires that the loop exit and unswitched basic block are the same, and | ||||||
338 | /// that the exiting block was a unique predecessor of that block. Rewrites the | ||||||
339 | /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial | ||||||
340 | /// PHI nodes from the old preheader that now contains the unswitched | ||||||
341 | /// terminator. | ||||||
342 | static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, | ||||||
343 | BasicBlock &OldExitingBB, | ||||||
344 | BasicBlock &OldPH) { | ||||||
345 | for (PHINode &PN : UnswitchedBB.phis()) { | ||||||
346 | // When the loop exit is directly unswitched we just need to update the | ||||||
347 | // incoming basic block. We loop to handle weird cases with repeated | ||||||
348 | // incoming blocks, but expect to typically only have one operand here. | ||||||
349 | for (auto i : seq<int>(0, PN.getNumOperands())) { | ||||||
350 | assert(PN.getIncomingBlock(i) == &OldExitingBB &&(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB && "Found incoming block different from unique predecessor!" ) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 351, __extension__ __PRETTY_FUNCTION__)) | ||||||
351 | "Found incoming block different from unique predecessor!")(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB && "Found incoming block different from unique predecessor!" ) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 351, __extension__ __PRETTY_FUNCTION__)); | ||||||
352 | PN.setIncomingBlock(i, &OldPH); | ||||||
353 | } | ||||||
354 | } | ||||||
355 | } | ||||||
356 | |||||||
357 | /// Rewrite the PHI nodes in the loop exit basic block and the split off | ||||||
358 | /// unswitched block. | ||||||
359 | /// | ||||||
360 | /// Because the exit block remains an exit from the loop, this rewrites the | ||||||
361 | /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI | ||||||
362 | /// nodes into the unswitched basic block to select between the value in the | ||||||
363 | /// old preheader and the loop exit. | ||||||
364 | static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, | ||||||
365 | BasicBlock &UnswitchedBB, | ||||||
366 | BasicBlock &OldExitingBB, | ||||||
367 | BasicBlock &OldPH, | ||||||
368 | bool FullUnswitch) { | ||||||
369 | assert(&ExitBB != &UnswitchedBB &&(static_cast <bool> (&ExitBB != &UnswitchedBB && "Must have different loop exit and unswitched blocks!") ? void (0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 370, __extension__ __PRETTY_FUNCTION__)) | ||||||
370 | "Must have different loop exit and unswitched blocks!")(static_cast <bool> (&ExitBB != &UnswitchedBB && "Must have different loop exit and unswitched blocks!") ? void (0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 370, __extension__ __PRETTY_FUNCTION__)); | ||||||
371 | Instruction *InsertPt = &*UnswitchedBB.begin(); | ||||||
372 | for (PHINode &PN : ExitBB.phis()) { | ||||||
373 | auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2, | ||||||
374 | PN.getName() + ".split", InsertPt); | ||||||
375 | |||||||
376 | // Walk backwards over the old PHI node's inputs to minimize the cost of | ||||||
377 | // removing each one. We have to do this weird loop manually so that we | ||||||
378 | // create the same number of new incoming edges in the new PHI as we expect | ||||||
379 | // each case-based edge to be included in the unswitched switch in some | ||||||
380 | // cases. | ||||||
381 | // FIXME: This is really, really gross. It would be much cleaner if LLVM | ||||||
382 | // allowed us to create a single entry for a predecessor block without | ||||||
383 | // having separate entries for each "edge" even though these edges are | ||||||
384 | // required to produce identical results. | ||||||
385 | for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) { | ||||||
386 | if (PN.getIncomingBlock(i) != &OldExitingBB) | ||||||
387 | continue; | ||||||
388 | |||||||
389 | Value *Incoming = PN.getIncomingValue(i); | ||||||
390 | if (FullUnswitch) | ||||||
391 | // No more edge from the old exiting block to the exit block. | ||||||
392 | PN.removeIncomingValue(i); | ||||||
393 | |||||||
394 | NewPN->addIncoming(Incoming, &OldPH); | ||||||
395 | } | ||||||
396 | |||||||
397 | // Now replace the old PHI with the new one and wire the old one in as an | ||||||
398 | // input to the new one. | ||||||
399 | PN.replaceAllUsesWith(NewPN); | ||||||
400 | NewPN->addIncoming(&PN, &ExitBB); | ||||||
401 | } | ||||||
402 | } | ||||||
403 | |||||||
404 | /// Hoist the current loop up to the innermost loop containing a remaining exit. | ||||||
405 | /// | ||||||
406 | /// Because we've removed an exit from the loop, we may have changed the set of | ||||||
407 | /// loops reachable and need to move the current loop up the loop nest or even | ||||||
408 | /// to an entirely separate nest. | ||||||
409 | static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, | ||||||
410 | DominatorTree &DT, LoopInfo &LI, | ||||||
411 | MemorySSAUpdater *MSSAU, ScalarEvolution *SE) { | ||||||
412 | // If the loop is already at the top level, we can't hoist it anywhere. | ||||||
413 | Loop *OldParentL = L.getParentLoop(); | ||||||
414 | if (!OldParentL) | ||||||
415 | return; | ||||||
416 | |||||||
417 | SmallVector<BasicBlock *, 4> Exits; | ||||||
418 | L.getExitBlocks(Exits); | ||||||
419 | Loop *NewParentL = nullptr; | ||||||
420 | for (auto *ExitBB : Exits) | ||||||
421 | if (Loop *ExitL = LI.getLoopFor(ExitBB)) | ||||||
422 | if (!NewParentL || NewParentL->contains(ExitL)) | ||||||
423 | NewParentL = ExitL; | ||||||
424 | |||||||
425 | if (NewParentL == OldParentL) | ||||||
426 | return; | ||||||
427 | |||||||
428 | // The new parent loop (if different) should always contain the old one. | ||||||
429 | if (NewParentL) | ||||||
430 | assert(NewParentL->contains(OldParentL) &&(static_cast <bool> (NewParentL->contains(OldParentL ) && "Can only hoist this loop up the nest!") ? void ( 0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 431, __extension__ __PRETTY_FUNCTION__)) | ||||||
431 | "Can only hoist this loop up the nest!")(static_cast <bool> (NewParentL->contains(OldParentL ) && "Can only hoist this loop up the nest!") ? void ( 0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 431, __extension__ __PRETTY_FUNCTION__)); | ||||||
432 | |||||||
433 | // The preheader will need to move with the body of this loop. However, | ||||||
434 | // because it isn't in this loop we also need to update the primary loop map. | ||||||
435 | assert(OldParentL == LI.getLoopFor(&Preheader) &&(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader ) && "Parent loop of this loop should contain this loop's preheader!" ) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 436, __extension__ __PRETTY_FUNCTION__)) | ||||||
436 | "Parent loop of this loop should contain this loop's preheader!")(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader ) && "Parent loop of this loop should contain this loop's preheader!" ) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 436, __extension__ __PRETTY_FUNCTION__)); | ||||||
437 | LI.changeLoopFor(&Preheader, NewParentL); | ||||||
438 | |||||||
439 | // Remove this loop from its old parent. | ||||||
440 | OldParentL->removeChildLoop(&L); | ||||||
441 | |||||||
442 | // Add the loop either to the new parent or as a top-level loop. | ||||||
443 | if (NewParentL) | ||||||
444 | NewParentL->addChildLoop(&L); | ||||||
445 | else | ||||||
446 | LI.addTopLevelLoop(&L); | ||||||
447 | |||||||
448 | // Remove this loops blocks from the old parent and every other loop up the | ||||||
449 | // nest until reaching the new parent. Also update all of these | ||||||
450 | // no-longer-containing loops to reflect the nesting change. | ||||||
451 | for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL; | ||||||
452 | OldContainingL = OldContainingL->getParentLoop()) { | ||||||
453 | llvm::erase_if(OldContainingL->getBlocksVector(), | ||||||
454 | [&](const BasicBlock *BB) { | ||||||
455 | return BB == &Preheader || L.contains(BB); | ||||||
456 | }); | ||||||
457 | |||||||
458 | OldContainingL->getBlocksSet().erase(&Preheader); | ||||||
459 | for (BasicBlock *BB : L.blocks()) | ||||||
460 | OldContainingL->getBlocksSet().erase(BB); | ||||||
461 | |||||||
462 | // Because we just hoisted a loop out of this one, we have essentially | ||||||
463 | // created new exit paths from it. That means we need to form LCSSA PHI | ||||||
464 | // nodes for values used in the no-longer-nested loop. | ||||||
465 | formLCSSA(*OldContainingL, DT, &LI); | ||||||
466 | |||||||
467 | // We shouldn't need to form dedicated exits because the exit introduced | ||||||
468 | // here is the (just split by unswitching) preheader. However, after trivial | ||||||
469 | // unswitching it is possible to get new non-dedicated exits out of parent | ||||||
470 | // loop so let's conservatively form dedicated exit blocks and figure out | ||||||
471 | // if we can optimize later. | ||||||
472 | formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU, | ||||||
473 | /*PreserveLCSSA*/ true); | ||||||
474 | } | ||||||
475 | } | ||||||
476 | |||||||
477 | // Return the top-most loop containing ExitBB and having ExitBB as exiting block | ||||||
478 | // or the loop containing ExitBB, if there is no parent loop containing ExitBB | ||||||
479 | // as exiting block. | ||||||
480 | static Loop *getTopMostExitingLoop(const BasicBlock *ExitBB, | ||||||
481 | const LoopInfo &LI) { | ||||||
482 | Loop *TopMost = LI.getLoopFor(ExitBB); | ||||||
483 | Loop *Current = TopMost; | ||||||
484 | while (Current) { | ||||||
485 | if (Current->isLoopExiting(ExitBB)) | ||||||
486 | TopMost = Current; | ||||||
487 | Current = Current->getParentLoop(); | ||||||
488 | } | ||||||
489 | return TopMost; | ||||||
490 | } | ||||||
491 | |||||||
492 | /// Unswitch a trivial branch if the condition is loop invariant. | ||||||
493 | /// | ||||||
494 | /// This routine should only be called when loop code leading to the branch has | ||||||
495 | /// been validated as trivial (no side effects). This routine checks if the | ||||||
496 | /// condition is invariant and one of the successors is a loop exit. This | ||||||
497 | /// allows us to unswitch without duplicating the loop, making it trivial. | ||||||
498 | /// | ||||||
499 | /// If this routine fails to unswitch the branch it returns false. | ||||||
500 | /// | ||||||
501 | /// If the branch can be unswitched, this routine splits the preheader and | ||||||
502 | /// hoists the branch above that split. Preserves loop simplified form | ||||||
503 | /// (splitting the exit block as necessary). It simplifies the branch within | ||||||
504 | /// the loop to an unconditional branch but doesn't remove it entirely. Further | ||||||
505 | /// cleanup can be done with some simplifycfg like pass. | ||||||
506 | /// | ||||||
507 | /// If `SE` is not null, it will be updated based on the potential loop SCEVs | ||||||
508 | /// invalidated by this. | ||||||
509 | static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, | ||||||
510 | LoopInfo &LI, ScalarEvolution *SE, | ||||||
511 | MemorySSAUpdater *MSSAU) { | ||||||
512 | assert(BI.isConditional() && "Can only unswitch a conditional branch!")(static_cast <bool> (BI.isConditional() && "Can only unswitch a conditional branch!" ) ? void (0) : __assert_fail ("BI.isConditional() && \"Can only unswitch a conditional branch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 512, __extension__ __PRETTY_FUNCTION__)); | ||||||
513 | LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Trying to unswitch branch: " << BI << "\n"; } } while (false); | ||||||
514 | |||||||
515 | // The loop invariant values that we want to unswitch. | ||||||
516 | TinyPtrVector<Value *> Invariants; | ||||||
517 | |||||||
518 | // When true, we're fully unswitching the branch rather than just unswitching | ||||||
519 | // some input conditions to the branch. | ||||||
520 | bool FullUnswitch = false; | ||||||
521 | |||||||
522 | Value *Cond = skipTrivialSelect(BI.getCondition()); | ||||||
523 | if (L.isLoopInvariant(Cond)) { | ||||||
524 | Invariants.push_back(Cond); | ||||||
525 | FullUnswitch = true; | ||||||
526 | } else { | ||||||
527 | if (auto *CondInst = dyn_cast<Instruction>(Cond)) | ||||||
528 | Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI); | ||||||
529 | if (Invariants.empty()) { | ||||||
530 | LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Couldn't find invariant inputs!\n" ; } } while (false); | ||||||
531 | return false; | ||||||
532 | } | ||||||
533 | } | ||||||
534 | |||||||
535 | // Check that one of the branch's successors exits, and which one. | ||||||
536 | bool ExitDirection = true; | ||||||
537 | int LoopExitSuccIdx = 0; | ||||||
538 | auto *LoopExitBB = BI.getSuccessor(0); | ||||||
539 | if (L.contains(LoopExitBB)) { | ||||||
540 | ExitDirection = false; | ||||||
541 | LoopExitSuccIdx = 1; | ||||||
542 | LoopExitBB = BI.getSuccessor(1); | ||||||
543 | if (L.contains(LoopExitBB)) { | ||||||
544 | LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Branch doesn't exit the loop!\n" ; } } while (false); | ||||||
545 | return false; | ||||||
546 | } | ||||||
547 | } | ||||||
548 | auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); | ||||||
549 | auto *ParentBB = BI.getParent(); | ||||||
550 | if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) { | ||||||
551 | LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Loop exit PHI's aren't loop-invariant!\n" ; } } while (false); | ||||||
552 | return false; | ||||||
553 | } | ||||||
554 | |||||||
555 | // When unswitching only part of the branch's condition, we need the exit | ||||||
556 | // block to be reached directly from the partially unswitched input. This can | ||||||
557 | // be done when the exit block is along the true edge and the branch condition | ||||||
558 | // is a graph of `or` operations, or the exit block is along the false edge | ||||||
559 | // and the condition is a graph of `and` operations. | ||||||
560 | if (!FullUnswitch) { | ||||||
561 | if (ExitDirection ? !match(Cond, m_LogicalOr()) | ||||||
562 | : !match(Cond, m_LogicalAnd())) { | ||||||
563 | LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Branch condition is in improper form for " "non-full unswitch!\n"; } } while (false) | ||||||
564 | "non-full unswitch!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Branch condition is in improper form for " "non-full unswitch!\n"; } } while (false); | ||||||
565 | return false; | ||||||
566 | } | ||||||
567 | } | ||||||
568 | |||||||
569 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false) | ||||||
570 | dbgs() << " unswitching trivial invariant conditions for: " << BIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false) | ||||||
571 | << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false) | ||||||
572 | for (Value *Invariant : Invariants) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false) | ||||||
573 | dbgs() << " " << *Invariant << " == true";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false) | ||||||
574 | if (Invariant != Invariants.back())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false) | ||||||
575 | dbgs() << " ||";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false) | ||||||
576 | dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false) | ||||||
577 | }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false) | ||||||
578 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: " << BI << "\n"; for (Value *Invariant : Invariants ) { dbgs() << " " << *Invariant << " == true" ; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs () << "\n"; } }; } } while (false); | ||||||
579 | |||||||
580 | // If we have scalar evolutions, we need to invalidate them including this | ||||||
581 | // loop, the loop containing the exit block and the topmost parent loop | ||||||
582 | // exiting via LoopExitBB. | ||||||
583 | if (SE) { | ||||||
584 | if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI)) | ||||||
585 | SE->forgetLoop(ExitL); | ||||||
586 | else | ||||||
587 | // Forget the entire nest as this exits the entire nest. | ||||||
588 | SE->forgetTopmostLoop(&L); | ||||||
589 | SE->forgetBlockAndLoopDispositions(); | ||||||
590 | } | ||||||
591 | |||||||
592 | if (MSSAU && VerifyMemorySSA) | ||||||
593 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
594 | |||||||
595 | // Split the preheader, so that we know that there is a safe place to insert | ||||||
596 | // the conditional branch. We will change the preheader to have a conditional | ||||||
597 | // branch on LoopCond. | ||||||
598 | BasicBlock *OldPH = L.getLoopPreheader(); | ||||||
599 | BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU); | ||||||
600 | |||||||
601 | // Now that we have a place to insert the conditional branch, create a place | ||||||
602 | // to branch to: this is the exit block out of the loop that we are | ||||||
603 | // unswitching. We need to split this if there are other loop predecessors. | ||||||
604 | // Because the loop is in simplified form, *any* other predecessor is enough. | ||||||
605 | BasicBlock *UnswitchedBB; | ||||||
606 | if (FullUnswitch && LoopExitBB->getUniquePredecessor()) { | ||||||
607 | assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&(static_cast <bool> (LoopExitBB->getUniquePredecessor () == BI.getParent() && "A branch's parent isn't a predecessor!" ) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 608, __extension__ __PRETTY_FUNCTION__)) | ||||||
608 | "A branch's parent isn't a predecessor!")(static_cast <bool> (LoopExitBB->getUniquePredecessor () == BI.getParent() && "A branch's parent isn't a predecessor!" ) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 608, __extension__ __PRETTY_FUNCTION__)); | ||||||
609 | UnswitchedBB = LoopExitBB; | ||||||
610 | } else { | ||||||
611 | UnswitchedBB = | ||||||
612 | SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU); | ||||||
613 | } | ||||||
614 | |||||||
615 | if (MSSAU && VerifyMemorySSA) | ||||||
616 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
617 | |||||||
618 | // Actually move the invariant uses into the unswitched position. If possible, | ||||||
619 | // we do this by moving the instructions, but when doing partial unswitching | ||||||
620 | // we do it by building a new merge of the values in the unswitched position. | ||||||
621 | OldPH->getTerminator()->eraseFromParent(); | ||||||
622 | if (FullUnswitch) { | ||||||
623 | // If fully unswitching, we can use the existing branch instruction. | ||||||
624 | // Splice it into the old PH to gate reaching the new preheader and re-point | ||||||
625 | // its successors. | ||||||
626 | OldPH->splice(OldPH->end(), BI.getParent(), BI.getIterator()); | ||||||
627 | BI.setCondition(Cond); | ||||||
628 | if (MSSAU) { | ||||||
629 | // Temporarily clone the terminator, to make MSSA update cheaper by | ||||||
630 | // separating "insert edge" updates from "remove edge" ones. | ||||||
631 | BI.clone()->insertInto(ParentBB, ParentBB->end()); | ||||||
632 | } else { | ||||||
633 | // Create a new unconditional branch that will continue the loop as a new | ||||||
634 | // terminator. | ||||||
635 | BranchInst::Create(ContinueBB, ParentBB); | ||||||
636 | } | ||||||
637 | BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); | ||||||
638 | BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); | ||||||
639 | } else { | ||||||
640 | // Only unswitching a subset of inputs to the condition, so we will need to | ||||||
641 | // build a new branch that merges the invariant inputs. | ||||||
642 | if (ExitDirection) | ||||||
643 | assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) &&(static_cast <bool> (match(skipTrivialSelect(BI.getCondition ()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the " "condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 645, __extension__ __PRETTY_FUNCTION__)) | ||||||
644 | "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "(static_cast <bool> (match(skipTrivialSelect(BI.getCondition ()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the " "condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 645, __extension__ __PRETTY_FUNCTION__)) | ||||||
645 | "condition!")(static_cast <bool> (match(skipTrivialSelect(BI.getCondition ()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the " "condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 645, __extension__ __PRETTY_FUNCTION__)); | ||||||
646 | else | ||||||
647 | assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) &&(static_cast <bool> (match(skipTrivialSelect(BI.getCondition ()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the" " condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 649, __extension__ __PRETTY_FUNCTION__)) | ||||||
648 | "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"(static_cast <bool> (match(skipTrivialSelect(BI.getCondition ()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the" " condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 649, __extension__ __PRETTY_FUNCTION__)) | ||||||
649 | " condition!")(static_cast <bool> (match(skipTrivialSelect(BI.getCondition ()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the" " condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 649, __extension__ __PRETTY_FUNCTION__)); | ||||||
650 | buildPartialUnswitchConditionalBranch( | ||||||
651 | *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH, | ||||||
652 | FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT); | ||||||
653 | } | ||||||
654 | |||||||
655 | // Update the dominator tree with the added edge. | ||||||
656 | DT.insertEdge(OldPH, UnswitchedBB); | ||||||
657 | |||||||
658 | // After the dominator tree was updated with the added edge, update MemorySSA | ||||||
659 | // if available. | ||||||
660 | if (MSSAU) { | ||||||
661 | SmallVector<CFGUpdate, 1> Updates; | ||||||
662 | Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB}); | ||||||
663 | MSSAU->applyInsertUpdates(Updates, DT); | ||||||
664 | } | ||||||
665 | |||||||
666 | // Finish updating dominator tree and memory ssa for full unswitch. | ||||||
667 | if (FullUnswitch) { | ||||||
668 | if (MSSAU) { | ||||||
669 | // Remove the cloned branch instruction. | ||||||
670 | ParentBB->getTerminator()->eraseFromParent(); | ||||||
671 | // Create unconditional branch now. | ||||||
672 | BranchInst::Create(ContinueBB, ParentBB); | ||||||
673 | MSSAU->removeEdge(ParentBB, LoopExitBB); | ||||||
674 | } | ||||||
675 | DT.deleteEdge(ParentBB, LoopExitBB); | ||||||
676 | } | ||||||
677 | |||||||
678 | if (MSSAU && VerifyMemorySSA) | ||||||
679 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
680 | |||||||
681 | // Rewrite the relevant PHI nodes. | ||||||
682 | if (UnswitchedBB == LoopExitBB) | ||||||
683 | rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH); | ||||||
684 | else | ||||||
685 | rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB, | ||||||
686 | *ParentBB, *OldPH, FullUnswitch); | ||||||
687 | |||||||
688 | // The constant we can replace all of our invariants with inside the loop | ||||||
689 | // body. If any of the invariants have a value other than this the loop won't | ||||||
690 | // be entered. | ||||||
691 | ConstantInt *Replacement = ExitDirection | ||||||
692 | ? ConstantInt::getFalse(BI.getContext()) | ||||||
693 | : ConstantInt::getTrue(BI.getContext()); | ||||||
694 | |||||||
695 | // Since this is an i1 condition we can also trivially replace uses of it | ||||||
696 | // within the loop with a constant. | ||||||
697 | for (Value *Invariant : Invariants) | ||||||
698 | replaceLoopInvariantUses(L, Invariant, *Replacement); | ||||||
699 | |||||||
700 | // If this was full unswitching, we may have changed the nesting relationship | ||||||
701 | // for this loop so hoist it to its correct parent if needed. | ||||||
702 | if (FullUnswitch) | ||||||
703 | hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE); | ||||||
704 | |||||||
705 | if (MSSAU && VerifyMemorySSA) | ||||||
706 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
707 | |||||||
708 | LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial branch...\n" ; } } while (false); | ||||||
709 | ++NumTrivial; | ||||||
710 | ++NumBranches; | ||||||
711 | return true; | ||||||
712 | } | ||||||
713 | |||||||
714 | /// Unswitch a trivial switch if the condition is loop invariant. | ||||||
715 | /// | ||||||
716 | /// This routine should only be called when loop code leading to the switch has | ||||||
717 | /// been validated as trivial (no side effects). This routine checks if the | ||||||
718 | /// condition is invariant and that at least one of the successors is a loop | ||||||
719 | /// exit. This allows us to unswitch without duplicating the loop, making it | ||||||
720 | /// trivial. | ||||||
721 | /// | ||||||
722 | /// If this routine fails to unswitch the switch it returns false. | ||||||
723 | /// | ||||||
724 | /// If the switch can be unswitched, this routine splits the preheader and | ||||||
725 | /// copies the switch above that split. If the default case is one of the | ||||||
726 | /// exiting cases, it copies the non-exiting cases and points them at the new | ||||||
727 | /// preheader. If the default case is not exiting, it copies the exiting cases | ||||||
728 | /// and points the default at the preheader. It preserves loop simplified form | ||||||
729 | /// (splitting the exit blocks as necessary). It simplifies the switch within | ||||||
730 | /// the loop by removing now-dead cases. If the default case is one of those | ||||||
731 | /// unswitched, it replaces its destination with a new basic block containing | ||||||
732 | /// only unreachable. Such basic blocks, while technically loop exits, are not | ||||||
733 | /// considered for unswitching so this is a stable transform and the same | ||||||
734 | /// switch will not be revisited. If after unswitching there is only a single | ||||||
735 | /// in-loop successor, the switch is further simplified to an unconditional | ||||||
736 | /// branch. Still more cleanup can be done with some simplifycfg like pass. | ||||||
737 | /// | ||||||
738 | /// If `SE` is not null, it will be updated based on the potential loop SCEVs | ||||||
739 | /// invalidated by this. | ||||||
740 | static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, | ||||||
741 | LoopInfo &LI, ScalarEvolution *SE, | ||||||
742 | MemorySSAUpdater *MSSAU) { | ||||||
743 | LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Trying to unswitch switch: " << SI << "\n"; } } while (false); | ||||||
744 | Value *LoopCond = SI.getCondition(); | ||||||
745 | |||||||
746 | // If this isn't switching on an invariant condition, we can't unswitch it. | ||||||
747 | if (!L.isLoopInvariant(LoopCond)) | ||||||
748 | return false; | ||||||
749 | |||||||
750 | auto *ParentBB = SI.getParent(); | ||||||
751 | |||||||
752 | // The same check must be used both for the default and the exit cases. We | ||||||
753 | // should never leave edges from the switch instruction to a basic block that | ||||||
754 | // we are unswitching, hence the condition used to determine the default case | ||||||
755 | // needs to also be used to populate ExitCaseIndices, which is then used to | ||||||
756 | // remove cases from the switch. | ||||||
757 | auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) { | ||||||
758 | // BBToCheck is not an exit block if it is inside loop L. | ||||||
759 | if (L.contains(&BBToCheck)) | ||||||
760 | return false; | ||||||
761 | // BBToCheck is not trivial to unswitch if its phis aren't loop invariant. | ||||||
762 | if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck)) | ||||||
763 | return false; | ||||||
764 | // We do not unswitch a block that only has an unreachable statement, as | ||||||
765 | // it's possible this is a previously unswitched block. Only unswitch if | ||||||
766 | // either the terminator is not unreachable, or, if it is, it's not the only | ||||||
767 | // instruction in the block. | ||||||
768 | auto *TI = BBToCheck.getTerminator(); | ||||||
769 | bool isUnreachable = isa<UnreachableInst>(TI); | ||||||
770 | return !isUnreachable || | ||||||
771 | (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI)); | ||||||
772 | }; | ||||||
773 | |||||||
774 | SmallVector<int, 4> ExitCaseIndices; | ||||||
775 | for (auto Case : SI.cases()) | ||||||
776 | if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor())) | ||||||
777 | ExitCaseIndices.push_back(Case.getCaseIndex()); | ||||||
778 | BasicBlock *DefaultExitBB = nullptr; | ||||||
779 | SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight = | ||||||
780 | SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0); | ||||||
781 | if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) { | ||||||
782 | DefaultExitBB = SI.getDefaultDest(); | ||||||
783 | } else if (ExitCaseIndices.empty()) | ||||||
784 | return false; | ||||||
785 | |||||||
786 | LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " unswitching trivial switch...\n" ; } } while (false); | ||||||
787 | |||||||
788 | if (MSSAU && VerifyMemorySSA) | ||||||
789 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
790 | |||||||
791 | // We may need to invalidate SCEVs for the outermost loop reached by any of | ||||||
792 | // the exits. | ||||||
793 | Loop *OuterL = &L; | ||||||
794 | |||||||
795 | if (DefaultExitBB) { | ||||||
796 | // Check the loop containing this exit. | ||||||
797 | Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI); | ||||||
798 | if (!ExitL || ExitL->contains(OuterL)) | ||||||
799 | OuterL = ExitL; | ||||||
800 | } | ||||||
801 | for (unsigned Index : ExitCaseIndices) { | ||||||
802 | auto CaseI = SI.case_begin() + Index; | ||||||
803 | // Compute the outer loop from this exit. | ||||||
804 | Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI); | ||||||
805 | if (!ExitL || ExitL->contains(OuterL)) | ||||||
806 | OuterL = ExitL; | ||||||
807 | } | ||||||
808 | |||||||
809 | if (SE) { | ||||||
810 | if (OuterL) | ||||||
811 | SE->forgetLoop(OuterL); | ||||||
812 | else | ||||||
813 | SE->forgetTopmostLoop(&L); | ||||||
814 | } | ||||||
815 | |||||||
816 | if (DefaultExitBB) { | ||||||
817 | // Clear out the default destination temporarily to allow accurate | ||||||
818 | // predecessor lists to be examined below. | ||||||
819 | SI.setDefaultDest(nullptr); | ||||||
820 | } | ||||||
821 | |||||||
822 | // Store the exit cases into a separate data structure and remove them from | ||||||
823 | // the switch. | ||||||
824 | SmallVector<std::tuple<ConstantInt *, BasicBlock *, | ||||||
825 | SwitchInstProfUpdateWrapper::CaseWeightOpt>, | ||||||
826 | 4> ExitCases; | ||||||
827 | ExitCases.reserve(ExitCaseIndices.size()); | ||||||
828 | SwitchInstProfUpdateWrapper SIW(SI); | ||||||
829 | // We walk the case indices backwards so that we remove the last case first | ||||||
830 | // and don't disrupt the earlier indices. | ||||||
831 | for (unsigned Index : reverse(ExitCaseIndices)) { | ||||||
832 | auto CaseI = SI.case_begin() + Index; | ||||||
833 | // Save the value of this case. | ||||||
834 | auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex()); | ||||||
835 | ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W); | ||||||
836 | // Delete the unswitched cases. | ||||||
837 | SIW.removeCase(CaseI); | ||||||
838 | } | ||||||
839 | |||||||
840 | // Check if after this all of the remaining cases point at the same | ||||||
841 | // successor. | ||||||
842 | BasicBlock *CommonSuccBB = nullptr; | ||||||
843 | if (SI.getNumCases() > 0 && | ||||||
844 | all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) { | ||||||
845 | return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor(); | ||||||
846 | })) | ||||||
847 | CommonSuccBB = SI.case_begin()->getCaseSuccessor(); | ||||||
848 | if (!DefaultExitBB) { | ||||||
849 | // If we're not unswitching the default, we need it to match any cases to | ||||||
850 | // have a common successor or if we have no cases it is the common | ||||||
851 | // successor. | ||||||
852 | if (SI.getNumCases() == 0) | ||||||
853 | CommonSuccBB = SI.getDefaultDest(); | ||||||
854 | else if (SI.getDefaultDest() != CommonSuccBB) | ||||||
855 | CommonSuccBB = nullptr; | ||||||
856 | } | ||||||
857 | |||||||
858 | // Split the preheader, so that we know that there is a safe place to insert | ||||||
859 | // the switch. | ||||||
860 | BasicBlock *OldPH = L.getLoopPreheader(); | ||||||
861 | BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU); | ||||||
862 | OldPH->getTerminator()->eraseFromParent(); | ||||||
863 | |||||||
864 | // Now add the unswitched switch. | ||||||
865 | auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); | ||||||
866 | SwitchInstProfUpdateWrapper NewSIW(*NewSI); | ||||||
867 | |||||||
868 | // Rewrite the IR for the unswitched basic blocks. This requires two steps. | ||||||
869 | // First, we split any exit blocks with remaining in-loop predecessors. Then | ||||||
870 | // we update the PHIs in one of two ways depending on if there was a split. | ||||||
871 | // We walk in reverse so that we split in the same order as the cases | ||||||
872 | // appeared. This is purely for convenience of reading the resulting IR, but | ||||||
873 | // it doesn't cost anything really. | ||||||
874 | SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs; | ||||||
875 | SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap; | ||||||
876 | // Handle the default exit if necessary. | ||||||
877 | // FIXME: It'd be great if we could merge this with the loop below but LLVM's | ||||||
878 | // ranges aren't quite powerful enough yet. | ||||||
879 | if (DefaultExitBB) { | ||||||
880 | if (pred_empty(DefaultExitBB)) { | ||||||
881 | UnswitchedExitBBs.insert(DefaultExitBB); | ||||||
882 | rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH); | ||||||
883 | } else { | ||||||
884 | auto *SplitBB = | ||||||
885 | SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU); | ||||||
886 | rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB, | ||||||
887 | *ParentBB, *OldPH, | ||||||
888 | /*FullUnswitch*/ true); | ||||||
889 | DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; | ||||||
890 | } | ||||||
891 | } | ||||||
892 | // Note that we must use a reference in the for loop so that we update the | ||||||
893 | // container. | ||||||
894 | for (auto &ExitCase : reverse(ExitCases)) { | ||||||
895 | // Grab a reference to the exit block in the pair so that we can update it. | ||||||
896 | BasicBlock *ExitBB = std::get<1>(ExitCase); | ||||||
897 | |||||||
898 | // If this case is the last edge into the exit block, we can simply reuse it | ||||||
899 | // as it will no longer be a loop exit. No mapping necessary. | ||||||
900 | if (pred_empty(ExitBB)) { | ||||||
901 | // Only rewrite once. | ||||||
902 | if (UnswitchedExitBBs.insert(ExitBB).second) | ||||||
903 | rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH); | ||||||
904 | continue; | ||||||
905 | } | ||||||
906 | |||||||
907 | // Otherwise we need to split the exit block so that we retain an exit | ||||||
908 | // block from the loop and a target for the unswitched condition. | ||||||
909 | BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; | ||||||
910 | if (!SplitExitBB) { | ||||||
911 | // If this is the first time we see this, do the split and remember it. | ||||||
912 | SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU); | ||||||
913 | rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB, | ||||||
914 | *ParentBB, *OldPH, | ||||||
915 | /*FullUnswitch*/ true); | ||||||
916 | } | ||||||
917 | // Update the case pair to point to the split block. | ||||||
918 | std::get<1>(ExitCase) = SplitExitBB; | ||||||
919 | } | ||||||
920 | |||||||
921 | // Now add the unswitched cases. We do this in reverse order as we built them | ||||||
922 | // in reverse order. | ||||||
923 | for (auto &ExitCase : reverse(ExitCases)) { | ||||||
924 | ConstantInt *CaseVal = std::get<0>(ExitCase); | ||||||
925 | BasicBlock *UnswitchedBB = std::get<1>(ExitCase); | ||||||
926 | |||||||
927 | NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase)); | ||||||
928 | } | ||||||
929 | |||||||
930 | // If the default was unswitched, re-point it and add explicit cases for | ||||||
931 | // entering the loop. | ||||||
932 | if (DefaultExitBB) { | ||||||
933 | NewSIW->setDefaultDest(DefaultExitBB); | ||||||
934 | NewSIW.setSuccessorWeight(0, DefaultCaseWeight); | ||||||
935 | |||||||
936 | // We removed all the exit cases, so we just copy the cases to the | ||||||
937 | // unswitched switch. | ||||||
938 | for (const auto &Case : SI.cases()) | ||||||
939 | NewSIW.addCase(Case.getCaseValue(), NewPH, | ||||||
940 | SIW.getSuccessorWeight(Case.getSuccessorIndex())); | ||||||
941 | } else if (DefaultCaseWeight) { | ||||||
942 | // We have to set branch weight of the default case. | ||||||
943 | uint64_t SW = *DefaultCaseWeight; | ||||||
944 | for (const auto &Case : SI.cases()) { | ||||||
945 | auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex()); | ||||||
946 | assert(W &&(static_cast <bool> (W && "case weight must be defined as default case weight is defined" ) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 947, __extension__ __PRETTY_FUNCTION__)) | ||||||
947 | "case weight must be defined as default case weight is defined")(static_cast <bool> (W && "case weight must be defined as default case weight is defined" ) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 947, __extension__ __PRETTY_FUNCTION__)); | ||||||
948 | SW += *W; | ||||||
949 | } | ||||||
950 | NewSIW.setSuccessorWeight(0, SW); | ||||||
951 | } | ||||||
952 | |||||||
953 | // If we ended up with a common successor for every path through the switch | ||||||
954 | // after unswitching, rewrite it to an unconditional branch to make it easy | ||||||
955 | // to recognize. Otherwise we potentially have to recognize the default case | ||||||
956 | // pointing at unreachable and other complexity. | ||||||
957 | if (CommonSuccBB) { | ||||||
958 | BasicBlock *BB = SI.getParent(); | ||||||
959 | // We may have had multiple edges to this common successor block, so remove | ||||||
960 | // them as predecessors. We skip the first one, either the default or the | ||||||
961 | // actual first case. | ||||||
962 | bool SkippedFirst = DefaultExitBB == nullptr; | ||||||
963 | for (auto Case : SI.cases()) { | ||||||
964 | assert(Case.getCaseSuccessor() == CommonSuccBB &&(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB && "Non-common successor!") ? void (0) : __assert_fail ("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 965, __extension__ __PRETTY_FUNCTION__)) | ||||||
965 | "Non-common successor!")(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB && "Non-common successor!") ? void (0) : __assert_fail ("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 965, __extension__ __PRETTY_FUNCTION__)); | ||||||
966 | (void)Case; | ||||||
967 | if (!SkippedFirst) { | ||||||
968 | SkippedFirst = true; | ||||||
969 | continue; | ||||||
970 | } | ||||||
971 | CommonSuccBB->removePredecessor(BB, | ||||||
972 | /*KeepOneInputPHIs*/ true); | ||||||
973 | } | ||||||
974 | // Now nuke the switch and replace it with a direct branch. | ||||||
975 | SIW.eraseFromParent(); | ||||||
976 | BranchInst::Create(CommonSuccBB, BB); | ||||||
977 | } else if (DefaultExitBB) { | ||||||
978 | assert(SI.getNumCases() > 0 &&(static_cast <bool> (SI.getNumCases() > 0 && "If we had no cases we'd have a common successor!") ? void ( 0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 979, __extension__ __PRETTY_FUNCTION__)) | ||||||
979 | "If we had no cases we'd have a common successor!")(static_cast <bool> (SI.getNumCases() > 0 && "If we had no cases we'd have a common successor!") ? void ( 0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 979, __extension__ __PRETTY_FUNCTION__)); | ||||||
980 | // Move the last case to the default successor. This is valid as if the | ||||||
981 | // default got unswitched it cannot be reached. This has the advantage of | ||||||
982 | // being simple and keeping the number of edges from this switch to | ||||||
983 | // successors the same, and avoiding any PHI update complexity. | ||||||
984 | auto LastCaseI = std::prev(SI.case_end()); | ||||||
985 | |||||||
986 | SI.setDefaultDest(LastCaseI->getCaseSuccessor()); | ||||||
987 | SIW.setSuccessorWeight( | ||||||
988 | 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex())); | ||||||
989 | SIW.removeCase(LastCaseI); | ||||||
990 | } | ||||||
991 | |||||||
992 | // Walk the unswitched exit blocks and the unswitched split blocks and update | ||||||
993 | // the dominator tree based on the CFG edits. While we are walking unordered | ||||||
994 | // containers here, the API for applyUpdates takes an unordered list of | ||||||
995 | // updates and requires them to not contain duplicates. | ||||||
996 | SmallVector<DominatorTree::UpdateType, 4> DTUpdates; | ||||||
997 | for (auto *UnswitchedExitBB : UnswitchedExitBBs) { | ||||||
998 | DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB}); | ||||||
999 | DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB}); | ||||||
1000 | } | ||||||
1001 | for (auto SplitUnswitchedPair : SplitExitBBMap) { | ||||||
1002 | DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first}); | ||||||
1003 | DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second}); | ||||||
1004 | } | ||||||
1005 | |||||||
1006 | if (MSSAU) { | ||||||
1007 | MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); | ||||||
1008 | if (VerifyMemorySSA) | ||||||
1009 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
1010 | } else { | ||||||
1011 | DT.applyUpdates(DTUpdates); | ||||||
1012 | } | ||||||
1013 | |||||||
1014 | assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel ::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1014, __extension__ __PRETTY_FUNCTION__)); | ||||||
1015 | |||||||
1016 | // We may have changed the nesting relationship for this loop so hoist it to | ||||||
1017 | // its correct parent if needed. | ||||||
1018 | hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE); | ||||||
1019 | |||||||
1020 | if (MSSAU && VerifyMemorySSA) | ||||||
1021 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
1022 | |||||||
1023 | ++NumTrivial; | ||||||
1024 | ++NumSwitches; | ||||||
1025 | LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial switch...\n" ; } } while (false); | ||||||
1026 | return true; | ||||||
1027 | } | ||||||
1028 | |||||||
1029 | /// This routine scans the loop to find a branch or switch which occurs before | ||||||
1030 | /// any side effects occur. These can potentially be unswitched without | ||||||
1031 | /// duplicating the loop. If a branch or switch is successfully unswitched the | ||||||
1032 | /// scanning continues to see if subsequent branches or switches have become | ||||||
1033 | /// trivial. Once all trivial candidates have been unswitched, this routine | ||||||
1034 | /// returns. | ||||||
1035 | /// | ||||||
1036 | /// The return value indicates whether anything was unswitched (and therefore | ||||||
1037 | /// changed). | ||||||
1038 | /// | ||||||
1039 | /// If `SE` is not null, it will be updated based on the potential loop SCEVs | ||||||
1040 | /// invalidated by this. | ||||||
1041 | static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, | ||||||
1042 | LoopInfo &LI, ScalarEvolution *SE, | ||||||
1043 | MemorySSAUpdater *MSSAU) { | ||||||
1044 | bool Changed = false; | ||||||
1045 | |||||||
1046 | // If loop header has only one reachable successor we should keep looking for | ||||||
1047 | // trivial condition candidates in the successor as well. An alternative is | ||||||
1048 | // to constant fold conditions and merge successors into loop header (then we | ||||||
1049 | // only need to check header's terminator). The reason for not doing this in | ||||||
1050 | // LoopUnswitch pass is that it could potentially break LoopPassManager's | ||||||
1051 | // invariants. Folding dead branches could either eliminate the current loop | ||||||
1052 | // or make other loops unreachable. LCSSA form might also not be preserved | ||||||
1053 | // after deleting branches. The following code keeps traversing loop header's | ||||||
1054 | // successors until it finds the trivial condition candidate (condition that | ||||||
1055 | // is not a constant). Since unswitching generates branches with constant | ||||||
1056 | // conditions, this scenario could be very common in practice. | ||||||
1057 | BasicBlock *CurrentBB = L.getHeader(); | ||||||
1058 | SmallPtrSet<BasicBlock *, 8> Visited; | ||||||
1059 | Visited.insert(CurrentBB); | ||||||
1060 | do { | ||||||
1061 | // Check if there are any side-effecting instructions (e.g. stores, calls, | ||||||
1062 | // volatile loads) in the part of the loop that the code *would* execute | ||||||
1063 | // without unswitching. | ||||||
1064 | if (MSSAU) // Possible early exit with MSSA | ||||||
1065 | if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB)) | ||||||
1066 | if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end())) | ||||||
1067 | return Changed; | ||||||
1068 | if (llvm::any_of(*CurrentBB, | ||||||
1069 | [](Instruction &I) { return I.mayHaveSideEffects(); })) | ||||||
1070 | return Changed; | ||||||
1071 | |||||||
1072 | Instruction *CurrentTerm = CurrentBB->getTerminator(); | ||||||
1073 | |||||||
1074 | if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) { | ||||||
1075 | // Don't bother trying to unswitch past a switch with a constant | ||||||
1076 | // condition. This should be removed prior to running this pass by | ||||||
1077 | // simplifycfg. | ||||||
1078 | if (isa<Constant>(SI->getCondition())) | ||||||
1079 | return Changed; | ||||||
1080 | |||||||
1081 | if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU)) | ||||||
1082 | // Couldn't unswitch this one so we're done. | ||||||
1083 | return Changed; | ||||||
1084 | |||||||
1085 | // Mark that we managed to unswitch something. | ||||||
1086 | Changed = true; | ||||||
1087 | |||||||
1088 | // If unswitching turned the terminator into an unconditional branch then | ||||||
1089 | // we can continue. The unswitching logic specifically works to fold any | ||||||
1090 | // cases it can into an unconditional branch to make it easier to | ||||||
1091 | // recognize here. | ||||||
1092 | auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator()); | ||||||
1093 | if (!BI || BI->isConditional()) | ||||||
1094 | return Changed; | ||||||
1095 | |||||||
1096 | CurrentBB = BI->getSuccessor(0); | ||||||
1097 | continue; | ||||||
1098 | } | ||||||
1099 | |||||||
1100 | auto *BI = dyn_cast<BranchInst>(CurrentTerm); | ||||||
1101 | if (!BI) | ||||||
1102 | // We do not understand other terminator instructions. | ||||||
1103 | return Changed; | ||||||
1104 | |||||||
1105 | // Don't bother trying to unswitch past an unconditional branch or a branch | ||||||
1106 | // with a constant value. These should be removed by simplifycfg prior to | ||||||
1107 | // running this pass. | ||||||
1108 | if (!BI->isConditional() || | ||||||
1109 | isa<Constant>(skipTrivialSelect(BI->getCondition()))) | ||||||
1110 | return Changed; | ||||||
1111 | |||||||
1112 | // Found a trivial condition candidate: non-foldable conditional branch. If | ||||||
1113 | // we fail to unswitch this, we can't do anything else that is trivial. | ||||||
1114 | if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU)) | ||||||
1115 | return Changed; | ||||||
1116 | |||||||
1117 | // Mark that we managed to unswitch something. | ||||||
1118 | Changed = true; | ||||||
1119 | |||||||
1120 | // If we only unswitched some of the conditions feeding the branch, we won't | ||||||
1121 | // have collapsed it to a single successor. | ||||||
1122 | BI = cast<BranchInst>(CurrentBB->getTerminator()); | ||||||
1123 | if (BI->isConditional()) | ||||||
1124 | return Changed; | ||||||
1125 | |||||||
1126 | // Follow the newly unconditional branch into its successor. | ||||||
1127 | CurrentBB = BI->getSuccessor(0); | ||||||
1128 | |||||||
1129 | // When continuing, if we exit the loop or reach a previous visited block, | ||||||
1130 | // then we can not reach any trivial condition candidates (unfoldable | ||||||
1131 | // branch instructions or switch instructions) and no unswitch can happen. | ||||||
1132 | } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); | ||||||
1133 | |||||||
1134 | return Changed; | ||||||
1135 | } | ||||||
1136 | |||||||
1137 | /// Build the cloned blocks for an unswitched copy of the given loop. | ||||||
1138 | /// | ||||||
1139 | /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and | ||||||
1140 | /// after the split block (`SplitBB`) that will be used to select between the | ||||||
1141 | /// cloned and original loop. | ||||||
1142 | /// | ||||||
1143 | /// This routine handles cloning all of the necessary loop blocks and exit | ||||||
1144 | /// blocks including rewriting their instructions and the relevant PHI nodes. | ||||||
1145 | /// Any loop blocks or exit blocks which are dominated by a different successor | ||||||
1146 | /// than the one for this clone of the loop blocks can be trivially skipped. We | ||||||
1147 | /// use the `DominatingSucc` map to determine whether a block satisfies that | ||||||
1148 | /// property with a simple map lookup. | ||||||
1149 | /// | ||||||
1150 | /// It also correctly creates the unconditional branch in the cloned | ||||||
1151 | /// unswitched parent block to only point at the unswitched successor. | ||||||
1152 | /// | ||||||
1153 | /// This does not handle most of the necessary updates to `LoopInfo`. Only exit | ||||||
1154 | /// block splitting is correctly reflected in `LoopInfo`, essentially all of | ||||||
1155 | /// the cloned blocks (and their loops) are left without full `LoopInfo` | ||||||
1156 | /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned | ||||||
1157 | /// blocks to them but doesn't create the cloned `DominatorTree` structure and | ||||||
1158 | /// instead the caller must recompute an accurate DT. It *does* correctly | ||||||
1159 | /// update the `AssumptionCache` provided in `AC`. | ||||||
1160 | static BasicBlock *buildClonedLoopBlocks( | ||||||
1161 | Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, | ||||||
1162 | ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB, | ||||||
1163 | BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, | ||||||
1164 | const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc, | ||||||
1165 | ValueToValueMapTy &VMap, | ||||||
1166 | SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC, | ||||||
1167 | DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, | ||||||
1168 | ScalarEvolution *SE) { | ||||||
1169 | SmallVector<BasicBlock *, 4> NewBlocks; | ||||||
1170 | NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size()); | ||||||
1171 | |||||||
1172 | // We will need to clone a bunch of blocks, wrap up the clone operation in | ||||||
1173 | // a helper. | ||||||
1174 | auto CloneBlock = [&](BasicBlock *OldBB) { | ||||||
1175 | // Clone the basic block and insert it before the new preheader. | ||||||
1176 | BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent()); | ||||||
1177 | NewBB->moveBefore(LoopPH); | ||||||
1178 | |||||||
1179 | // Record this block and the mapping. | ||||||
1180 | NewBlocks.push_back(NewBB); | ||||||
1181 | VMap[OldBB] = NewBB; | ||||||
1182 | |||||||
1183 | return NewBB; | ||||||
1184 | }; | ||||||
1185 | |||||||
1186 | // We skip cloning blocks when they have a dominating succ that is not the | ||||||
1187 | // succ we are cloning for. | ||||||
1188 | auto SkipBlock = [&](BasicBlock *BB) { | ||||||
1189 | auto It = DominatingSucc.find(BB); | ||||||
1190 | return It != DominatingSucc.end() && It->second != UnswitchedSuccBB; | ||||||
1191 | }; | ||||||
1192 | |||||||
1193 | // First, clone the preheader. | ||||||
1194 | auto *ClonedPH = CloneBlock(LoopPH); | ||||||
1195 | |||||||
1196 | // Then clone all the loop blocks, skipping the ones that aren't necessary. | ||||||
1197 | for (auto *LoopBB : L.blocks()) | ||||||
1198 | if (!SkipBlock(LoopBB)) | ||||||
1199 | CloneBlock(LoopBB); | ||||||
1200 | |||||||
1201 | // Split all the loop exit edges so that when we clone the exit blocks, if | ||||||
1202 | // any of the exit blocks are *also* a preheader for some other loop, we | ||||||
1203 | // don't create multiple predecessors entering the loop header. | ||||||
1204 | for (auto *ExitBB : ExitBlocks) { | ||||||
1205 | if (SkipBlock(ExitBB)) | ||||||
1206 | continue; | ||||||
1207 | |||||||
1208 | // When we are going to clone an exit, we don't need to clone all the | ||||||
1209 | // instructions in the exit block and we want to ensure we have an easy | ||||||
1210 | // place to merge the CFG, so split the exit first. This is always safe to | ||||||
1211 | // do because there cannot be any non-loop predecessors of a loop exit in | ||||||
1212 | // loop simplified form. | ||||||
1213 | auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU); | ||||||
1214 | |||||||
1215 | // Rearrange the names to make it easier to write test cases by having the | ||||||
1216 | // exit block carry the suffix rather than the merge block carrying the | ||||||
1217 | // suffix. | ||||||
1218 | MergeBB->takeName(ExitBB); | ||||||
1219 | ExitBB->setName(Twine(MergeBB->getName()) + ".split"); | ||||||
1220 | |||||||
1221 | // Now clone the original exit block. | ||||||
1222 | auto *ClonedExitBB = CloneBlock(ExitBB); | ||||||
1223 | assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&(static_cast <bool> (ClonedExitBB->getTerminator()-> getNumSuccessors() == 1 && "Exit block should have been split to have one successor!" ) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1224, __extension__ __PRETTY_FUNCTION__)) | ||||||
1224 | "Exit block should have been split to have one successor!")(static_cast <bool> (ClonedExitBB->getTerminator()-> getNumSuccessors() == 1 && "Exit block should have been split to have one successor!" ) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1224, __extension__ __PRETTY_FUNCTION__)); | ||||||
1225 | assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&(static_cast <bool> (ClonedExitBB->getTerminator()-> getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!" ) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1226, __extension__ __PRETTY_FUNCTION__)) | ||||||
1226 | "Cloned exit block has the wrong successor!")(static_cast <bool> (ClonedExitBB->getTerminator()-> getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!" ) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1226, __extension__ __PRETTY_FUNCTION__)); | ||||||
1227 | |||||||
1228 | // Remap any cloned instructions and create a merge phi node for them. | ||||||
1229 | for (auto ZippedInsts : llvm::zip_first( | ||||||
1230 | llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())), | ||||||
1231 | llvm::make_range(ClonedExitBB->begin(), | ||||||
1232 | std::prev(ClonedExitBB->end())))) { | ||||||
1233 | Instruction &I = std::get<0>(ZippedInsts); | ||||||
1234 | Instruction &ClonedI = std::get<1>(ZippedInsts); | ||||||
1235 | |||||||
1236 | // The only instructions in the exit block should be PHI nodes and | ||||||
1237 | // potentially a landing pad. | ||||||
1238 | assert((static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst >(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!" ) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1240, __extension__ __PRETTY_FUNCTION__)) | ||||||
1239 | (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst >(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!" ) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1240, __extension__ __PRETTY_FUNCTION__)) | ||||||
1240 | "Bad instruction in exit block!")(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst >(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!" ) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1240, __extension__ __PRETTY_FUNCTION__)); | ||||||
1241 | // We should have a value map between the instruction and its clone. | ||||||
1242 | assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!")(static_cast <bool> (VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!") ? void (0) : __assert_fail ("VMap.lookup(&I) == &ClonedI && \"Mismatch in the value map!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1242, __extension__ __PRETTY_FUNCTION__)); | ||||||
1243 | |||||||
1244 | // Forget SCEVs based on exit phis in case SCEV looked through the phi. | ||||||
1245 | if (SE && isa<PHINode>(I)) | ||||||
1246 | SE->forgetValue(&I); | ||||||
1247 | |||||||
1248 | auto *MergePN = | ||||||
1249 | PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi", | ||||||
1250 | &*MergeBB->getFirstInsertionPt()); | ||||||
1251 | I.replaceAllUsesWith(MergePN); | ||||||
1252 | MergePN->addIncoming(&I, ExitBB); | ||||||
1253 | MergePN->addIncoming(&ClonedI, ClonedExitBB); | ||||||
1254 | } | ||||||
1255 | } | ||||||
1256 | |||||||
1257 | // Rewrite the instructions in the cloned blocks to refer to the instructions | ||||||
1258 | // in the cloned blocks. We have to do this as a second pass so that we have | ||||||
1259 | // everything available. Also, we have inserted new instructions which may | ||||||
1260 | // include assume intrinsics, so we update the assumption cache while | ||||||
1261 | // processing this. | ||||||
1262 | for (auto *ClonedBB : NewBlocks) | ||||||
1263 | for (Instruction &I : *ClonedBB) { | ||||||
1264 | RemapInstruction(&I, VMap, | ||||||
1265 | RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); | ||||||
1266 | if (auto *II = dyn_cast<AssumeInst>(&I)) | ||||||
1267 | AC.registerAssumption(II); | ||||||
1268 | } | ||||||
1269 | |||||||
1270 | // Update any PHI nodes in the cloned successors of the skipped blocks to not | ||||||
1271 | // have spurious incoming values. | ||||||
1272 | for (auto *LoopBB : L.blocks()) | ||||||
1273 | if (SkipBlock(LoopBB)) | ||||||
1274 | for (auto *SuccBB : successors(LoopBB)) | ||||||
1275 | if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB))) | ||||||
1276 | for (PHINode &PN : ClonedSuccBB->phis()) | ||||||
1277 | PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false); | ||||||
1278 | |||||||
1279 | // Remove the cloned parent as a predecessor of any successor we ended up | ||||||
1280 | // cloning other than the unswitched one. | ||||||
1281 | auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB)); | ||||||
1282 | for (auto *SuccBB : successors(ParentBB)) { | ||||||
1283 | if (SuccBB == UnswitchedSuccBB) | ||||||
1284 | continue; | ||||||
1285 | |||||||
1286 | auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)); | ||||||
1287 | if (!ClonedSuccBB) | ||||||
1288 | continue; | ||||||
1289 | |||||||
1290 | ClonedSuccBB->removePredecessor(ClonedParentBB, | ||||||
1291 | /*KeepOneInputPHIs*/ true); | ||||||
1292 | } | ||||||
1293 | |||||||
1294 | // Replace the cloned branch with an unconditional branch to the cloned | ||||||
1295 | // unswitched successor. | ||||||
1296 | auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB)); | ||||||
1297 | Instruction *ClonedTerminator = ClonedParentBB->getTerminator(); | ||||||
1298 | // Trivial Simplification. If Terminator is a conditional branch and | ||||||
1299 | // condition becomes dead - erase it. | ||||||
1300 | Value *ClonedConditionToErase = nullptr; | ||||||
1301 | if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator)) | ||||||
1302 | ClonedConditionToErase = BI->getCondition(); | ||||||
1303 | else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator)) | ||||||
1304 | ClonedConditionToErase = SI->getCondition(); | ||||||
1305 | |||||||
1306 | ClonedTerminator->eraseFromParent(); | ||||||
1307 | BranchInst::Create(ClonedSuccBB, ClonedParentBB); | ||||||
1308 | |||||||
1309 | if (ClonedConditionToErase) | ||||||
1310 | RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr, | ||||||
1311 | MSSAU); | ||||||
1312 | |||||||
1313 | // If there are duplicate entries in the PHI nodes because of multiple edges | ||||||
1314 | // to the unswitched successor, we need to nuke all but one as we replaced it | ||||||
1315 | // with a direct branch. | ||||||
1316 | for (PHINode &PN : ClonedSuccBB->phis()) { | ||||||
1317 | bool Found = false; | ||||||
1318 | // Loop over the incoming operands backwards so we can easily delete as we | ||||||
1319 | // go without invalidating the index. | ||||||
1320 | for (int i = PN.getNumOperands() - 1; i >= 0; --i) { | ||||||
1321 | if (PN.getIncomingBlock(i) != ClonedParentBB) | ||||||
1322 | continue; | ||||||
1323 | if (!Found) { | ||||||
1324 | Found = true; | ||||||
1325 | continue; | ||||||
1326 | } | ||||||
1327 | PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false); | ||||||
1328 | } | ||||||
1329 | } | ||||||
1330 | |||||||
1331 | // Record the domtree updates for the new blocks. | ||||||
1332 | SmallPtrSet<BasicBlock *, 4> SuccSet; | ||||||
1333 | for (auto *ClonedBB : NewBlocks) { | ||||||
1334 | for (auto *SuccBB : successors(ClonedBB)) | ||||||
1335 | if (SuccSet.insert(SuccBB).second) | ||||||
1336 | DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB}); | ||||||
1337 | SuccSet.clear(); | ||||||
1338 | } | ||||||
1339 | |||||||
1340 | return ClonedPH; | ||||||
1341 | } | ||||||
1342 | |||||||
1343 | /// Recursively clone the specified loop and all of its children. | ||||||
1344 | /// | ||||||
1345 | /// The target parent loop for the clone should be provided, or can be null if | ||||||
1346 | /// the clone is a top-level loop. While cloning, all the blocks are mapped | ||||||
1347 | /// with the provided value map. The entire original loop must be present in | ||||||
1348 | /// the value map. The cloned loop is returned. | ||||||
1349 | static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, | ||||||
1350 | const ValueToValueMapTy &VMap, LoopInfo &LI) { | ||||||
1351 | auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) { | ||||||
1352 | assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!")(static_cast <bool> (ClonedL.getBlocks().empty() && "Must start with an empty loop!") ? void (0) : __assert_fail ("ClonedL.getBlocks().empty() && \"Must start with an empty loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1352, __extension__ __PRETTY_FUNCTION__)); | ||||||
1353 | ClonedL.reserveBlocks(OrigL.getNumBlocks()); | ||||||
1354 | for (auto *BB : OrigL.blocks()) { | ||||||
1355 | auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB)); | ||||||
1356 | ClonedL.addBlockEntry(ClonedBB); | ||||||
1357 | if (LI.getLoopFor(BB) == &OrigL) | ||||||
1358 | LI.changeLoopFor(ClonedBB, &ClonedL); | ||||||
1359 | } | ||||||
1360 | }; | ||||||
1361 | |||||||
1362 | // We specially handle the first loop because it may get cloned into | ||||||
1363 | // a different parent and because we most commonly are cloning leaf loops. | ||||||
1364 | Loop *ClonedRootL = LI.AllocateLoop(); | ||||||
1365 | if (RootParentL) | ||||||
1366 | RootParentL->addChildLoop(ClonedRootL); | ||||||
1367 | else | ||||||
1368 | LI.addTopLevelLoop(ClonedRootL); | ||||||
1369 | AddClonedBlocksToLoop(OrigRootL, *ClonedRootL); | ||||||
1370 | |||||||
1371 | if (OrigRootL.isInnermost()) | ||||||
1372 | return ClonedRootL; | ||||||
1373 | |||||||
1374 | // If we have a nest, we can quickly clone the entire loop nest using an | ||||||
1375 | // iterative approach because it is a tree. We keep the cloned parent in the | ||||||
1376 | // data structure to avoid repeatedly querying through a map to find it. | ||||||
1377 | SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone; | ||||||
1378 | // Build up the loops to clone in reverse order as we'll clone them from the | ||||||
1379 | // back. | ||||||
1380 | for (Loop *ChildL : llvm::reverse(OrigRootL)) | ||||||
1381 | LoopsToClone.push_back({ClonedRootL, ChildL}); | ||||||
1382 | do { | ||||||
1383 | Loop *ClonedParentL, *L; | ||||||
1384 | std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val(); | ||||||
1385 | Loop *ClonedL = LI.AllocateLoop(); | ||||||
1386 | ClonedParentL->addChildLoop(ClonedL); | ||||||
1387 | AddClonedBlocksToLoop(*L, *ClonedL); | ||||||
1388 | for (Loop *ChildL : llvm::reverse(*L)) | ||||||
1389 | LoopsToClone.push_back({ClonedL, ChildL}); | ||||||
1390 | } while (!LoopsToClone.empty()); | ||||||
1391 | |||||||
1392 | return ClonedRootL; | ||||||
1393 | } | ||||||
1394 | |||||||
1395 | /// Build the cloned loops of an original loop from unswitching. | ||||||
1396 | /// | ||||||
1397 | /// Because unswitching simplifies the CFG of the loop, this isn't a trivial | ||||||
1398 | /// operation. We need to re-verify that there even is a loop (as the backedge | ||||||
1399 | /// may not have been cloned), and even if there are remaining backedges the | ||||||
1400 | /// backedge set may be different. However, we know that each child loop is | ||||||
1401 | /// undisturbed, we only need to find where to place each child loop within | ||||||
1402 | /// either any parent loop or within a cloned version of the original loop. | ||||||
1403 | /// | ||||||
1404 | /// Because child loops may end up cloned outside of any cloned version of the | ||||||
1405 | /// original loop, multiple cloned sibling loops may be created. All of them | ||||||
1406 | /// are returned so that the newly introduced loop nest roots can be | ||||||
1407 | /// identified. | ||||||
1408 | static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks, | ||||||
1409 | const ValueToValueMapTy &VMap, LoopInfo &LI, | ||||||
1410 | SmallVectorImpl<Loop *> &NonChildClonedLoops) { | ||||||
1411 | Loop *ClonedL = nullptr; | ||||||
1412 | |||||||
1413 | auto *OrigPH = OrigL.getLoopPreheader(); | ||||||
1414 | auto *OrigHeader = OrigL.getHeader(); | ||||||
1415 | |||||||
1416 | auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH)); | ||||||
1417 | auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader)); | ||||||
1418 | |||||||
1419 | // We need to know the loops of the cloned exit blocks to even compute the | ||||||
1420 | // accurate parent loop. If we only clone exits to some parent of the | ||||||
1421 | // original parent, we want to clone into that outer loop. We also keep track | ||||||
1422 | // of the loops that our cloned exit blocks participate in. | ||||||
1423 | Loop *ParentL = nullptr; | ||||||
1424 | SmallVector<BasicBlock *, 4> ClonedExitsInLoops; | ||||||
1425 | SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap; | ||||||
1426 | ClonedExitsInLoops.reserve(ExitBlocks.size()); | ||||||
1427 | for (auto *ExitBB : ExitBlocks) | ||||||
1428 | if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB))) | ||||||
1429 | if (Loop *ExitL = LI.getLoopFor(ExitBB)) { | ||||||
1430 | ExitLoopMap[ClonedExitBB] = ExitL; | ||||||
1431 | ClonedExitsInLoops.push_back(ClonedExitBB); | ||||||
1432 | if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL))) | ||||||
1433 | ParentL = ExitL; | ||||||
1434 | } | ||||||
1435 | assert((!ParentL || ParentL == OrigL.getParentLoop() ||(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop () || ParentL->contains(OrigL.getParentLoop())) && "The computed parent loop should always contain (or be) the parent of " "the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1438, __extension__ __PRETTY_FUNCTION__)) | ||||||
1436 | ParentL->contains(OrigL.getParentLoop())) &&(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop () || ParentL->contains(OrigL.getParentLoop())) && "The computed parent loop should always contain (or be) the parent of " "the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1438, __extension__ __PRETTY_FUNCTION__)) | ||||||
1437 | "The computed parent loop should always contain (or be) the parent of "(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop () || ParentL->contains(OrigL.getParentLoop())) && "The computed parent loop should always contain (or be) the parent of " "the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1438, __extension__ __PRETTY_FUNCTION__)) | ||||||
1438 | "the original loop.")(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop () || ParentL->contains(OrigL.getParentLoop())) && "The computed parent loop should always contain (or be) the parent of " "the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1438, __extension__ __PRETTY_FUNCTION__)); | ||||||
1439 | |||||||
1440 | // We build the set of blocks dominated by the cloned header from the set of | ||||||
1441 | // cloned blocks out of the original loop. While not all of these will | ||||||
1442 | // necessarily be in the cloned loop, it is enough to establish that they | ||||||
1443 | // aren't in unreachable cycles, etc. | ||||||
1444 | SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks; | ||||||
1445 | for (auto *BB : OrigL.blocks()) | ||||||
1446 | if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB))) | ||||||
1447 | ClonedLoopBlocks.insert(ClonedBB); | ||||||
1448 | |||||||
1449 | // Rebuild the set of blocks that will end up in the cloned loop. We may have | ||||||
1450 | // skipped cloning some region of this loop which can in turn skip some of | ||||||
1451 | // the backedges so we have to rebuild the blocks in the loop based on the | ||||||
1452 | // backedges that remain after cloning. | ||||||
1453 | SmallVector<BasicBlock *, 16> Worklist; | ||||||
1454 | SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop; | ||||||
1455 | for (auto *Pred : predecessors(ClonedHeader)) { | ||||||
1456 | // The only possible non-loop header predecessor is the preheader because | ||||||
1457 | // we know we cloned the loop in simplified form. | ||||||
1458 | if (Pred == ClonedPH) | ||||||
1459 | continue; | ||||||
1460 | |||||||
1461 | // Because the loop was in simplified form, the only non-loop predecessor | ||||||
1462 | // should be the preheader. | ||||||
1463 | assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "(static_cast <bool> (ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop " "header other than the preheader " "that is not part of the loop!") ? void (0) : __assert_fail ( "ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1465, __extension__ __PRETTY_FUNCTION__)) | ||||||
1464 | "header other than the preheader "(static_cast <bool> (ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop " "header other than the preheader " "that is not part of the loop!") ? void (0) : __assert_fail ( "ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1465, __extension__ __PRETTY_FUNCTION__)) | ||||||
1465 | "that is not part of the loop!")(static_cast <bool> (ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop " "header other than the preheader " "that is not part of the loop!") ? void (0) : __assert_fail ( "ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1465, __extension__ __PRETTY_FUNCTION__)); | ||||||
1466 | |||||||
1467 | // Insert this block into the loop set and on the first visit (and if it | ||||||
1468 | // isn't the header we're currently walking) put it into the worklist to | ||||||
1469 | // recurse through. | ||||||
1470 | if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader) | ||||||
1471 | Worklist.push_back(Pred); | ||||||
1472 | } | ||||||
1473 | |||||||
1474 | // If we had any backedges then there *is* a cloned loop. Put the header into | ||||||
1475 | // the loop set and then walk the worklist backwards to find all the blocks | ||||||
1476 | // that remain within the loop after cloning. | ||||||
1477 | if (!BlocksInClonedLoop.empty()) { | ||||||
1478 | BlocksInClonedLoop.insert(ClonedHeader); | ||||||
1479 | |||||||
1480 | while (!Worklist.empty()) { | ||||||
1481 | BasicBlock *BB = Worklist.pop_back_val(); | ||||||
1482 | assert(BlocksInClonedLoop.count(BB) &&(static_cast <bool> (BlocksInClonedLoop.count(BB) && "Didn't put block into the loop set!") ? void (0) : __assert_fail ("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1483, __extension__ __PRETTY_FUNCTION__)) | ||||||
1483 | "Didn't put block into the loop set!")(static_cast <bool> (BlocksInClonedLoop.count(BB) && "Didn't put block into the loop set!") ? void (0) : __assert_fail ("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1483, __extension__ __PRETTY_FUNCTION__)); | ||||||
1484 | |||||||
1485 | // Insert any predecessors that are in the possible set into the cloned | ||||||
1486 | // set, and if the insert is successful, add them to the worklist. Note | ||||||
1487 | // that we filter on the blocks that are definitely reachable via the | ||||||
1488 | // backedge to the loop header so we may prune out dead code within the | ||||||
1489 | // cloned loop. | ||||||
1490 | for (auto *Pred : predecessors(BB)) | ||||||
1491 | if (ClonedLoopBlocks.count(Pred) && | ||||||
1492 | BlocksInClonedLoop.insert(Pred).second) | ||||||
1493 | Worklist.push_back(Pred); | ||||||
1494 | } | ||||||
1495 | |||||||
1496 | ClonedL = LI.AllocateLoop(); | ||||||
1497 | if (ParentL) { | ||||||
1498 | ParentL->addBasicBlockToLoop(ClonedPH, LI); | ||||||
1499 | ParentL->addChildLoop(ClonedL); | ||||||
1500 | } else { | ||||||
1501 | LI.addTopLevelLoop(ClonedL); | ||||||
1502 | } | ||||||
1503 | NonChildClonedLoops.push_back(ClonedL); | ||||||
1504 | |||||||
1505 | ClonedL->reserveBlocks(BlocksInClonedLoop.size()); | ||||||
1506 | // We don't want to just add the cloned loop blocks based on how we | ||||||
1507 | // discovered them. The original order of blocks was carefully built in | ||||||
1508 | // a way that doesn't rely on predecessor ordering. Rather than re-invent | ||||||
1509 | // that logic, we just re-walk the original blocks (and those of the child | ||||||
1510 | // loops) and filter them as we add them into the cloned loop. | ||||||
1511 | for (auto *BB : OrigL.blocks()) { | ||||||
1512 | auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)); | ||||||
1513 | if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB)) | ||||||
1514 | continue; | ||||||
1515 | |||||||
1516 | // Directly add the blocks that are only in this loop. | ||||||
1517 | if (LI.getLoopFor(BB) == &OrigL) { | ||||||
1518 | ClonedL->addBasicBlockToLoop(ClonedBB, LI); | ||||||
1519 | continue; | ||||||
1520 | } | ||||||
1521 | |||||||
1522 | // We want to manually add it to this loop and parents. | ||||||
1523 | // Registering it with LoopInfo will happen when we clone the top | ||||||
1524 | // loop for this block. | ||||||
1525 | for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop()) | ||||||
1526 | PL->addBlockEntry(ClonedBB); | ||||||
1527 | } | ||||||
1528 | |||||||
1529 | // Now add each child loop whose header remains within the cloned loop. All | ||||||
1530 | // of the blocks within the loop must satisfy the same constraints as the | ||||||
1531 | // header so once we pass the header checks we can just clone the entire | ||||||
1532 | // child loop nest. | ||||||
1533 | for (Loop *ChildL : OrigL) { | ||||||
1534 | auto *ClonedChildHeader = | ||||||
1535 | cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader())); | ||||||
1536 | if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader)) | ||||||
1537 | continue; | ||||||
1538 | |||||||
1539 | #ifndef NDEBUG | ||||||
1540 | // We should never have a cloned child loop header but fail to have | ||||||
1541 | // all of the blocks for that child loop. | ||||||
1542 | for (auto *ChildLoopBB : ChildL->blocks()) | ||||||
1543 | assert(BlocksInClonedLoop.count((static_cast <bool> (BlocksInClonedLoop.count( cast< BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer " "loop but not all of its blocks!") ? void (0) : __assert_fail ("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1546, __extension__ __PRETTY_FUNCTION__)) | ||||||
1544 | cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&(static_cast <bool> (BlocksInClonedLoop.count( cast< BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer " "loop but not all of its blocks!") ? void (0) : __assert_fail ("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1546, __extension__ __PRETTY_FUNCTION__)) | ||||||
1545 | "Child cloned loop has a header within the cloned outer "(static_cast <bool> (BlocksInClonedLoop.count( cast< BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer " "loop but not all of its blocks!") ? void (0) : __assert_fail ("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1546, __extension__ __PRETTY_FUNCTION__)) | ||||||
1546 | "loop but not all of its blocks!")(static_cast <bool> (BlocksInClonedLoop.count( cast< BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer " "loop but not all of its blocks!") ? void (0) : __assert_fail ("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1546, __extension__ __PRETTY_FUNCTION__)); | ||||||
1547 | #endif | ||||||
1548 | |||||||
1549 | cloneLoopNest(*ChildL, ClonedL, VMap, LI); | ||||||
1550 | } | ||||||
1551 | } | ||||||
1552 | |||||||
1553 | // Now that we've handled all the components of the original loop that were | ||||||
1554 | // cloned into a new loop, we still need to handle anything from the original | ||||||
1555 | // loop that wasn't in a cloned loop. | ||||||
1556 | |||||||
1557 | // Figure out what blocks are left to place within any loop nest containing | ||||||
1558 | // the unswitched loop. If we never formed a loop, the cloned PH is one of | ||||||
1559 | // them. | ||||||
1560 | SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet; | ||||||
1561 | if (BlocksInClonedLoop.empty()) | ||||||
1562 | UnloopedBlockSet.insert(ClonedPH); | ||||||
1563 | for (auto *ClonedBB : ClonedLoopBlocks) | ||||||
1564 | if (!BlocksInClonedLoop.count(ClonedBB)) | ||||||
1565 | UnloopedBlockSet.insert(ClonedBB); | ||||||
1566 | |||||||
1567 | // Copy the cloned exits and sort them in ascending loop depth, we'll work | ||||||
1568 | // backwards across these to process them inside out. The order shouldn't | ||||||
1569 | // matter as we're just trying to build up the map from inside-out; we use | ||||||
1570 | // the map in a more stably ordered way below. | ||||||
1571 | auto OrderedClonedExitsInLoops = ClonedExitsInLoops; | ||||||
1572 | llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) { | ||||||
1573 | return ExitLoopMap.lookup(LHS)->getLoopDepth() < | ||||||
1574 | ExitLoopMap.lookup(RHS)->getLoopDepth(); | ||||||
1575 | }); | ||||||
1576 | |||||||
1577 | // Populate the existing ExitLoopMap with everything reachable from each | ||||||
1578 | // exit, starting from the inner most exit. | ||||||
1579 | while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) { | ||||||
1580 | assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!" ) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1580, __extension__ __PRETTY_FUNCTION__)); | ||||||
1581 | |||||||
1582 | BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val(); | ||||||
1583 | Loop *ExitL = ExitLoopMap.lookup(ExitBB); | ||||||
1584 | |||||||
1585 | // Walk the CFG back until we hit the cloned PH adding everything reachable | ||||||
1586 | // and in the unlooped set to this exit block's loop. | ||||||
1587 | Worklist.push_back(ExitBB); | ||||||
1588 | do { | ||||||
1589 | BasicBlock *BB = Worklist.pop_back_val(); | ||||||
1590 | // We can stop recursing at the cloned preheader (if we get there). | ||||||
1591 | if (BB == ClonedPH) | ||||||
1592 | continue; | ||||||
1593 | |||||||
1594 | for (BasicBlock *PredBB : predecessors(BB)) { | ||||||
1595 | // If this pred has already been moved to our set or is part of some | ||||||
1596 | // (inner) loop, no update needed. | ||||||
1597 | if (!UnloopedBlockSet.erase(PredBB)) { | ||||||
1598 | assert((static_cast <bool> ((BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!" ) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1600, __extension__ __PRETTY_FUNCTION__)) | ||||||
1599 | (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!" ) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1600, __extension__ __PRETTY_FUNCTION__)) | ||||||
1600 | "Predecessor not mapped to a loop!")(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!" ) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1600, __extension__ __PRETTY_FUNCTION__)); | ||||||
1601 | continue; | ||||||
1602 | } | ||||||
1603 | |||||||
1604 | // We just insert into the loop set here. We'll add these blocks to the | ||||||
1605 | // exit loop after we build up the set in an order that doesn't rely on | ||||||
1606 | // predecessor order (which in turn relies on use list order). | ||||||
1607 | bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second; | ||||||
1608 | (void)Inserted; | ||||||
1609 | assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!" ) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1609, __extension__ __PRETTY_FUNCTION__)); | ||||||
1610 | |||||||
1611 | // And recurse through to its predecessors. | ||||||
1612 | Worklist.push_back(PredBB); | ||||||
1613 | } | ||||||
1614 | } while (!Worklist.empty()); | ||||||
1615 | } | ||||||
1616 | |||||||
1617 | // Now that the ExitLoopMap gives as mapping for all the non-looping cloned | ||||||
1618 | // blocks to their outer loops, walk the cloned blocks and the cloned exits | ||||||
1619 | // in their original order adding them to the correct loop. | ||||||
1620 | |||||||
1621 | // We need a stable insertion order. We use the order of the original loop | ||||||
1622 | // order and map into the correct parent loop. | ||||||
1623 | for (auto *BB : llvm::concat<BasicBlock *const>( | ||||||
1624 | ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops)) | ||||||
1625 | if (Loop *OuterL = ExitLoopMap.lookup(BB)) | ||||||
1626 | OuterL->addBasicBlockToLoop(BB, LI); | ||||||
1627 | |||||||
1628 | #ifndef NDEBUG | ||||||
1629 | for (auto &BBAndL : ExitLoopMap) { | ||||||
1630 | auto *BB = BBAndL.first; | ||||||
1631 | auto *OuterL = BBAndL.second; | ||||||
1632 | assert(LI.getLoopFor(BB) == OuterL &&(static_cast <bool> (LI.getLoopFor(BB) == OuterL && "Failed to put all blocks into outer loops!") ? void (0) : __assert_fail ("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1633, __extension__ __PRETTY_FUNCTION__)) | ||||||
1633 | "Failed to put all blocks into outer loops!")(static_cast <bool> (LI.getLoopFor(BB) == OuterL && "Failed to put all blocks into outer loops!") ? void (0) : __assert_fail ("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1633, __extension__ __PRETTY_FUNCTION__)); | ||||||
1634 | } | ||||||
1635 | #endif | ||||||
1636 | |||||||
1637 | // Now that all the blocks are placed into the correct containing loop in the | ||||||
1638 | // absence of child loops, find all the potentially cloned child loops and | ||||||
1639 | // clone them into whatever outer loop we placed their header into. | ||||||
1640 | for (Loop *ChildL : OrigL) { | ||||||
1641 | auto *ClonedChildHeader = | ||||||
1642 | cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader())); | ||||||
1643 | if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader)) | ||||||
1644 | continue; | ||||||
1645 | |||||||
1646 | #ifndef NDEBUG | ||||||
1647 | for (auto *ChildLoopBB : ChildL->blocks()) | ||||||
1648 | assert(VMap.count(ChildLoopBB) &&(static_cast <bool> (VMap.count(ChildLoopBB) && "Cloned a child loop header but not all of that loops blocks!" ) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1649, __extension__ __PRETTY_FUNCTION__)) | ||||||
1649 | "Cloned a child loop header but not all of that loops blocks!")(static_cast <bool> (VMap.count(ChildLoopBB) && "Cloned a child loop header but not all of that loops blocks!" ) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1649, __extension__ __PRETTY_FUNCTION__)); | ||||||
1650 | #endif | ||||||
1651 | |||||||
1652 | NonChildClonedLoops.push_back(cloneLoopNest( | ||||||
1653 | *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI)); | ||||||
1654 | } | ||||||
1655 | } | ||||||
1656 | |||||||
1657 | static void | ||||||
1658 | deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks, | ||||||
1659 | ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, | ||||||
1660 | DominatorTree &DT, MemorySSAUpdater *MSSAU) { | ||||||
1661 | // Find all the dead clones, and remove them from their successors. | ||||||
1662 | SmallVector<BasicBlock *, 16> DeadBlocks; | ||||||
1663 | for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks)) | ||||||
1664 | for (const auto &VMap : VMaps) | ||||||
1665 | if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB))) | ||||||
1666 | if (!DT.isReachableFromEntry(ClonedBB)) { | ||||||
1667 | for (BasicBlock *SuccBB : successors(ClonedBB)) | ||||||
1668 | SuccBB->removePredecessor(ClonedBB); | ||||||
1669 | DeadBlocks.push_back(ClonedBB); | ||||||
1670 | } | ||||||
1671 | |||||||
1672 | // Remove all MemorySSA in the dead blocks | ||||||
1673 | if (MSSAU) { | ||||||
1674 | SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(), | ||||||
1675 | DeadBlocks.end()); | ||||||
1676 | MSSAU->removeBlocks(DeadBlockSet); | ||||||
1677 | } | ||||||
1678 | |||||||
1679 | // Drop any remaining references to break cycles. | ||||||
1680 | for (BasicBlock *BB : DeadBlocks) | ||||||
1681 | BB->dropAllReferences(); | ||||||
1682 | // Erase them from the IR. | ||||||
1683 | for (BasicBlock *BB : DeadBlocks) | ||||||
1684 | BB->eraseFromParent(); | ||||||
1685 | } | ||||||
1686 | |||||||
1687 | static void | ||||||
1688 | deleteDeadBlocksFromLoop(Loop &L, | ||||||
1689 | SmallVectorImpl<BasicBlock *> &ExitBlocks, | ||||||
1690 | DominatorTree &DT, LoopInfo &LI, | ||||||
1691 | MemorySSAUpdater *MSSAU, | ||||||
1692 | ScalarEvolution *SE, | ||||||
1693 | function_ref<void(Loop &, StringRef)> DestroyLoopCB) { | ||||||
1694 | // Find all the dead blocks tied to this loop, and remove them from their | ||||||
1695 | // successors. | ||||||
1696 | SmallSetVector<BasicBlock *, 8> DeadBlockSet; | ||||||
1697 | |||||||
1698 | // Start with loop/exit blocks and get a transitive closure of reachable dead | ||||||
1699 | // blocks. | ||||||
1700 | SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(), | ||||||
1701 | ExitBlocks.end()); | ||||||
1702 | DeathCandidates.append(L.blocks().begin(), L.blocks().end()); | ||||||
1703 | while (!DeathCandidates.empty()) { | ||||||
1704 | auto *BB = DeathCandidates.pop_back_val(); | ||||||
1705 | if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) { | ||||||
1706 | for (BasicBlock *SuccBB : successors(BB)) { | ||||||
1707 | SuccBB->removePredecessor(BB); | ||||||
1708 | DeathCandidates.push_back(SuccBB); | ||||||
1709 | } | ||||||
1710 | DeadBlockSet.insert(BB); | ||||||
1711 | } | ||||||
1712 | } | ||||||
1713 | |||||||
1714 | // Remove all MemorySSA in the dead blocks | ||||||
1715 | if (MSSAU) | ||||||
1716 | MSSAU->removeBlocks(DeadBlockSet); | ||||||
1717 | |||||||
1718 | // Filter out the dead blocks from the exit blocks list so that it can be | ||||||
1719 | // used in the caller. | ||||||
1720 | llvm::erase_if(ExitBlocks, | ||||||
1721 | [&](BasicBlock *BB) { return DeadBlockSet.count(BB); }); | ||||||
1722 | |||||||
1723 | // Walk from this loop up through its parents removing all of the dead blocks. | ||||||
1724 | for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) { | ||||||
1725 | for (auto *BB : DeadBlockSet) | ||||||
1726 | ParentL->getBlocksSet().erase(BB); | ||||||
1727 | llvm::erase_if(ParentL->getBlocksVector(), | ||||||
1728 | [&](BasicBlock *BB) { return DeadBlockSet.count(BB); }); | ||||||
1729 | } | ||||||
1730 | |||||||
1731 | // Now delete the dead child loops. This raw delete will clear them | ||||||
1732 | // recursively. | ||||||
1733 | llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) { | ||||||
1734 | if (!DeadBlockSet.count(ChildL->getHeader())) | ||||||
1735 | return false; | ||||||
1736 | |||||||
1737 | assert(llvm::all_of(ChildL->blocks(),(static_cast <bool> (llvm::all_of(ChildL->blocks(), [ &](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB ); }) && "If the child loop header is dead all blocks in the child loop must " "be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__ __PRETTY_FUNCTION__)) | ||||||
1738 | [&](BasicBlock *ChildBB) {(static_cast <bool> (llvm::all_of(ChildL->blocks(), [ &](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB ); }) && "If the child loop header is dead all blocks in the child loop must " "be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__ __PRETTY_FUNCTION__)) | ||||||
1739 | return DeadBlockSet.count(ChildBB);(static_cast <bool> (llvm::all_of(ChildL->blocks(), [ &](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB ); }) && "If the child loop header is dead all blocks in the child loop must " "be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__ __PRETTY_FUNCTION__)) | ||||||
1740 | }) &&(static_cast <bool> (llvm::all_of(ChildL->blocks(), [ &](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB ); }) && "If the child loop header is dead all blocks in the child loop must " "be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__ __PRETTY_FUNCTION__)) | ||||||
1741 | "If the child loop header is dead all blocks in the child loop must "(static_cast <bool> (llvm::all_of(ChildL->blocks(), [ &](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB ); }) && "If the child loop header is dead all blocks in the child loop must " "be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__ __PRETTY_FUNCTION__)) | ||||||
1742 | "be dead as well!")(static_cast <bool> (llvm::all_of(ChildL->blocks(), [ &](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB ); }) && "If the child loop header is dead all blocks in the child loop must " "be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__ __PRETTY_FUNCTION__)); | ||||||
1743 | DestroyLoopCB(*ChildL, ChildL->getName()); | ||||||
1744 | if (SE) | ||||||
1745 | SE->forgetBlockAndLoopDispositions(); | ||||||
1746 | LI.destroy(ChildL); | ||||||
1747 | return true; | ||||||
1748 | }); | ||||||
1749 | |||||||
1750 | // Remove the loop mappings for the dead blocks and drop all the references | ||||||
1751 | // from these blocks to others to handle cyclic references as we start | ||||||
1752 | // deleting the blocks themselves. | ||||||
1753 | for (auto *BB : DeadBlockSet) { | ||||||
1754 | // Check that the dominator tree has already been updated. | ||||||
1755 | assert(!DT.getNode(BB) && "Should already have cleared domtree!")(static_cast <bool> (!DT.getNode(BB) && "Should already have cleared domtree!" ) ? void (0) : __assert_fail ("!DT.getNode(BB) && \"Should already have cleared domtree!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1755, __extension__ __PRETTY_FUNCTION__)); | ||||||
1756 | LI.changeLoopFor(BB, nullptr); | ||||||
1757 | // Drop all uses of the instructions to make sure we won't have dangling | ||||||
1758 | // uses in other blocks. | ||||||
1759 | for (auto &I : *BB) | ||||||
1760 | if (!I.use_empty()) | ||||||
1761 | I.replaceAllUsesWith(PoisonValue::get(I.getType())); | ||||||
1762 | BB->dropAllReferences(); | ||||||
1763 | } | ||||||
1764 | |||||||
1765 | // Actually delete the blocks now that they've been fully unhooked from the | ||||||
1766 | // IR. | ||||||
1767 | for (auto *BB : DeadBlockSet) | ||||||
1768 | BB->eraseFromParent(); | ||||||
1769 | } | ||||||
1770 | |||||||
1771 | /// Recompute the set of blocks in a loop after unswitching. | ||||||
1772 | /// | ||||||
1773 | /// This walks from the original headers predecessors to rebuild the loop. We | ||||||
1774 | /// take advantage of the fact that new blocks can't have been added, and so we | ||||||
1775 | /// filter by the original loop's blocks. This also handles potentially | ||||||
1776 | /// unreachable code that we don't want to explore but might be found examining | ||||||
1777 | /// the predecessors of the header. | ||||||
1778 | /// | ||||||
1779 | /// If the original loop is no longer a loop, this will return an empty set. If | ||||||
1780 | /// it remains a loop, all the blocks within it will be added to the set | ||||||
1781 | /// (including those blocks in inner loops). | ||||||
1782 | static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L, | ||||||
1783 | LoopInfo &LI) { | ||||||
1784 | SmallPtrSet<const BasicBlock *, 16> LoopBlockSet; | ||||||
1785 | |||||||
1786 | auto *PH = L.getLoopPreheader(); | ||||||
1787 | auto *Header = L.getHeader(); | ||||||
1788 | |||||||
1789 | // A worklist to use while walking backwards from the header. | ||||||
1790 | SmallVector<BasicBlock *, 16> Worklist; | ||||||
1791 | |||||||
1792 | // First walk the predecessors of the header to find the backedges. This will | ||||||
1793 | // form the basis of our walk. | ||||||
1794 | for (auto *Pred : predecessors(Header)) { | ||||||
1795 | // Skip the preheader. | ||||||
1796 | if (Pred == PH) | ||||||
1797 | continue; | ||||||
1798 | |||||||
1799 | // Because the loop was in simplified form, the only non-loop predecessor | ||||||
1800 | // is the preheader. | ||||||
1801 | assert(L.contains(Pred) && "Found a predecessor of the loop header other "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other " "than the preheader that is not part of the " "loop!") ? void (0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1803, __extension__ __PRETTY_FUNCTION__)) | ||||||
1802 | "than the preheader that is not part of the "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other " "than the preheader that is not part of the " "loop!") ? void (0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1803, __extension__ __PRETTY_FUNCTION__)) | ||||||
1803 | "loop!")(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other " "than the preheader that is not part of the " "loop!") ? void (0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1803, __extension__ __PRETTY_FUNCTION__)); | ||||||
1804 | |||||||
1805 | // Insert this block into the loop set and on the first visit and, if it | ||||||
1806 | // isn't the header we're currently walking, put it into the worklist to | ||||||
1807 | // recurse through. | ||||||
1808 | if (LoopBlockSet.insert(Pred).second && Pred != Header) | ||||||
1809 | Worklist.push_back(Pred); | ||||||
1810 | } | ||||||
1811 | |||||||
1812 | // If no backedges were found, we're done. | ||||||
1813 | if (LoopBlockSet.empty()) | ||||||
1814 | return LoopBlockSet; | ||||||
1815 | |||||||
1816 | // We found backedges, recurse through them to identify the loop blocks. | ||||||
1817 | while (!Worklist.empty()) { | ||||||
1818 | BasicBlock *BB = Worklist.pop_back_val(); | ||||||
1819 | assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!")(static_cast <bool> (LoopBlockSet.count(BB) && "Didn't put block into the loop set!" ) ? void (0) : __assert_fail ("LoopBlockSet.count(BB) && \"Didn't put block into the loop set!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1819, __extension__ __PRETTY_FUNCTION__)); | ||||||
1820 | |||||||
1821 | // No need to walk past the header. | ||||||
1822 | if (BB == Header) | ||||||
1823 | continue; | ||||||
1824 | |||||||
1825 | // Because we know the inner loop structure remains valid we can use the | ||||||
1826 | // loop structure to jump immediately across the entire nested loop. | ||||||
1827 | // Further, because it is in loop simplified form, we can directly jump | ||||||
1828 | // to its preheader afterward. | ||||||
1829 | if (Loop *InnerL = LI.getLoopFor(BB)) | ||||||
1830 | if (InnerL != &L) { | ||||||
1831 | assert(L.contains(InnerL) &&(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!" ) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1832, __extension__ __PRETTY_FUNCTION__)) | ||||||
1832 | "Should not reach a loop *outside* this loop!")(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!" ) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1832, __extension__ __PRETTY_FUNCTION__)); | ||||||
1833 | // The preheader is the only possible predecessor of the loop so | ||||||
1834 | // insert it into the set and check whether it was already handled. | ||||||
1835 | auto *InnerPH = InnerL->getLoopPreheader(); | ||||||
1836 | assert(L.contains(InnerPH) && "Cannot contain an inner loop block "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block " "but not contain the inner loop " "preheader!") ? void (0) : __assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1838, __extension__ __PRETTY_FUNCTION__)) | ||||||
1837 | "but not contain the inner loop "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block " "but not contain the inner loop " "preheader!") ? void (0) : __assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1838, __extension__ __PRETTY_FUNCTION__)) | ||||||
1838 | "preheader!")(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block " "but not contain the inner loop " "preheader!") ? void (0) : __assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1838, __extension__ __PRETTY_FUNCTION__)); | ||||||
1839 | if (!LoopBlockSet.insert(InnerPH).second) | ||||||
1840 | // The only way to reach the preheader is through the loop body | ||||||
1841 | // itself so if it has been visited the loop is already handled. | ||||||
1842 | continue; | ||||||
1843 | |||||||
1844 | // Insert all of the blocks (other than those already present) into | ||||||
1845 | // the loop set. We expect at least the block that led us to find the | ||||||
1846 | // inner loop to be in the block set, but we may also have other loop | ||||||
1847 | // blocks if they were already enqueued as predecessors of some other | ||||||
1848 | // outer loop block. | ||||||
1849 | for (auto *InnerBB : InnerL->blocks()) { | ||||||
1850 | if (InnerBB == BB) { | ||||||
1851 | assert(LoopBlockSet.count(InnerBB) &&(static_cast <bool> (LoopBlockSet.count(InnerBB) && "Block should already be in the set!") ? void (0) : __assert_fail ("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1852, __extension__ __PRETTY_FUNCTION__)) | ||||||
1852 | "Block should already be in the set!")(static_cast <bool> (LoopBlockSet.count(InnerBB) && "Block should already be in the set!") ? void (0) : __assert_fail ("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1852, __extension__ __PRETTY_FUNCTION__)); | ||||||
1853 | continue; | ||||||
1854 | } | ||||||
1855 | |||||||
1856 | LoopBlockSet.insert(InnerBB); | ||||||
1857 | } | ||||||
1858 | |||||||
1859 | // Add the preheader to the worklist so we will continue past the | ||||||
1860 | // loop body. | ||||||
1861 | Worklist.push_back(InnerPH); | ||||||
1862 | continue; | ||||||
1863 | } | ||||||
1864 | |||||||
1865 | // Insert any predecessors that were in the original loop into the new | ||||||
1866 | // set, and if the insert is successful, add them to the worklist. | ||||||
1867 | for (auto *Pred : predecessors(BB)) | ||||||
1868 | if (L.contains(Pred) && LoopBlockSet.insert(Pred).second) | ||||||
1869 | Worklist.push_back(Pred); | ||||||
1870 | } | ||||||
1871 | |||||||
1872 | assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!")(static_cast <bool> (LoopBlockSet.count(Header) && "Cannot fail to add the header!") ? void (0) : __assert_fail ("LoopBlockSet.count(Header) && \"Cannot fail to add the header!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1872, __extension__ __PRETTY_FUNCTION__)); | ||||||
1873 | |||||||
1874 | // We've found all the blocks participating in the loop, return our completed | ||||||
1875 | // set. | ||||||
1876 | return LoopBlockSet; | ||||||
1877 | } | ||||||
1878 | |||||||
1879 | /// Rebuild a loop after unswitching removes some subset of blocks and edges. | ||||||
1880 | /// | ||||||
1881 | /// The removal may have removed some child loops entirely but cannot have | ||||||
1882 | /// disturbed any remaining child loops. However, they may need to be hoisted | ||||||
1883 | /// to the parent loop (or to be top-level loops). The original loop may be | ||||||
1884 | /// completely removed. | ||||||
1885 | /// | ||||||
1886 | /// The sibling loops resulting from this update are returned. If the original | ||||||
1887 | /// loop remains a valid loop, it will be the first entry in this list with all | ||||||
1888 | /// of the newly sibling loops following it. | ||||||
1889 | /// | ||||||
1890 | /// Returns true if the loop remains a loop after unswitching, and false if it | ||||||
1891 | /// is no longer a loop after unswitching (and should not continue to be | ||||||
1892 | /// referenced). | ||||||
1893 | static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks, | ||||||
1894 | LoopInfo &LI, | ||||||
1895 | SmallVectorImpl<Loop *> &HoistedLoops, | ||||||
1896 | ScalarEvolution *SE) { | ||||||
1897 | auto *PH = L.getLoopPreheader(); | ||||||
1898 | |||||||
1899 | // Compute the actual parent loop from the exit blocks. Because we may have | ||||||
1900 | // pruned some exits the loop may be different from the original parent. | ||||||
1901 | Loop *ParentL = nullptr; | ||||||
1902 | SmallVector<Loop *, 4> ExitLoops; | ||||||
1903 | SmallVector<BasicBlock *, 4> ExitsInLoops; | ||||||
1904 | ExitsInLoops.reserve(ExitBlocks.size()); | ||||||
1905 | for (auto *ExitBB : ExitBlocks) | ||||||
1906 | if (Loop *ExitL = LI.getLoopFor(ExitBB)) { | ||||||
1907 | ExitLoops.push_back(ExitL); | ||||||
1908 | ExitsInLoops.push_back(ExitBB); | ||||||
1909 | if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL))) | ||||||
1910 | ParentL = ExitL; | ||||||
1911 | } | ||||||
1912 | |||||||
1913 | // Recompute the blocks participating in this loop. This may be empty if it | ||||||
1914 | // is no longer a loop. | ||||||
1915 | auto LoopBlockSet = recomputeLoopBlockSet(L, LI); | ||||||
1916 | |||||||
1917 | // If we still have a loop, we need to re-set the loop's parent as the exit | ||||||
1918 | // block set changing may have moved it within the loop nest. Note that this | ||||||
1919 | // can only happen when this loop has a parent as it can only hoist the loop | ||||||
1920 | // *up* the nest. | ||||||
1921 | if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) { | ||||||
1922 | // Remove this loop's (original) blocks from all of the intervening loops. | ||||||
1923 | for (Loop *IL = L.getParentLoop(); IL != ParentL; | ||||||
1924 | IL = IL->getParentLoop()) { | ||||||
1925 | IL->getBlocksSet().erase(PH); | ||||||
1926 | for (auto *BB : L.blocks()) | ||||||
1927 | IL->getBlocksSet().erase(BB); | ||||||
1928 | llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) { | ||||||
1929 | return BB == PH || L.contains(BB); | ||||||
1930 | }); | ||||||
1931 | } | ||||||
1932 | |||||||
1933 | LI.changeLoopFor(PH, ParentL); | ||||||
1934 | L.getParentLoop()->removeChildLoop(&L); | ||||||
1935 | if (ParentL) | ||||||
1936 | ParentL->addChildLoop(&L); | ||||||
1937 | else | ||||||
1938 | LI.addTopLevelLoop(&L); | ||||||
1939 | } | ||||||
1940 | |||||||
1941 | // Now we update all the blocks which are no longer within the loop. | ||||||
1942 | auto &Blocks = L.getBlocksVector(); | ||||||
1943 | auto BlocksSplitI = | ||||||
1944 | LoopBlockSet.empty() | ||||||
1945 | ? Blocks.begin() | ||||||
1946 | : std::stable_partition( | ||||||
1947 | Blocks.begin(), Blocks.end(), | ||||||
1948 | [&](BasicBlock *BB) { return LoopBlockSet.count(BB); }); | ||||||
1949 | |||||||
1950 | // Before we erase the list of unlooped blocks, build a set of them. | ||||||
1951 | SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end()); | ||||||
1952 | if (LoopBlockSet.empty()) | ||||||
1953 | UnloopedBlocks.insert(PH); | ||||||
1954 | |||||||
1955 | // Now erase these blocks from the loop. | ||||||
1956 | for (auto *BB : make_range(BlocksSplitI, Blocks.end())) | ||||||
1957 | L.getBlocksSet().erase(BB); | ||||||
1958 | Blocks.erase(BlocksSplitI, Blocks.end()); | ||||||
1959 | |||||||
1960 | // Sort the exits in ascending loop depth, we'll work backwards across these | ||||||
1961 | // to process them inside out. | ||||||
1962 | llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) { | ||||||
1963 | return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS); | ||||||
1964 | }); | ||||||
1965 | |||||||
1966 | // We'll build up a set for each exit loop. | ||||||
1967 | SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks; | ||||||
1968 | Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop. | ||||||
1969 | |||||||
1970 | auto RemoveUnloopedBlocksFromLoop = | ||||||
1971 | [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) { | ||||||
1972 | for (auto *BB : UnloopedBlocks) | ||||||
1973 | L.getBlocksSet().erase(BB); | ||||||
1974 | llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) { | ||||||
1975 | return UnloopedBlocks.count(BB); | ||||||
1976 | }); | ||||||
1977 | }; | ||||||
1978 | |||||||
1979 | SmallVector<BasicBlock *, 16> Worklist; | ||||||
1980 | while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) { | ||||||
1981 | assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!" ) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1981, __extension__ __PRETTY_FUNCTION__)); | ||||||
1982 | assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!")(static_cast <bool> (NewExitLoopBlocks.empty() && "Didn't clear loop set!") ? void (0) : __assert_fail ("NewExitLoopBlocks.empty() && \"Didn't clear loop set!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1982, __extension__ __PRETTY_FUNCTION__)); | ||||||
1983 | |||||||
1984 | // Grab the next exit block, in decreasing loop depth order. | ||||||
1985 | BasicBlock *ExitBB = ExitsInLoops.pop_back_val(); | ||||||
1986 | Loop &ExitL = *LI.getLoopFor(ExitBB); | ||||||
1987 | assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!")(static_cast <bool> (ExitL.contains(&L) && "Exit loop must contain the inner loop!" ) ? void (0) : __assert_fail ("ExitL.contains(&L) && \"Exit loop must contain the inner loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1987, __extension__ __PRETTY_FUNCTION__)); | ||||||
1988 | |||||||
1989 | // Erase all of the unlooped blocks from the loops between the previous | ||||||
1990 | // exit loop and this exit loop. This works because the ExitInLoops list is | ||||||
1991 | // sorted in increasing order of loop depth and thus we visit loops in | ||||||
1992 | // decreasing order of loop depth. | ||||||
1993 | for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop()) | ||||||
1994 | RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks); | ||||||
1995 | |||||||
1996 | // Walk the CFG back until we hit the cloned PH adding everything reachable | ||||||
1997 | // and in the unlooped set to this exit block's loop. | ||||||
1998 | Worklist.push_back(ExitBB); | ||||||
1999 | do { | ||||||
2000 | BasicBlock *BB = Worklist.pop_back_val(); | ||||||
2001 | // We can stop recursing at the cloned preheader (if we get there). | ||||||
2002 | if (BB == PH) | ||||||
2003 | continue; | ||||||
2004 | |||||||
2005 | for (BasicBlock *PredBB : predecessors(BB)) { | ||||||
2006 | // If this pred has already been moved to our set or is part of some | ||||||
2007 | // (inner) loop, no update needed. | ||||||
2008 | if (!UnloopedBlocks.erase(PredBB)) { | ||||||
2009 | assert((NewExitLoopBlocks.count(PredBB) ||(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!" ) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2011, __extension__ __PRETTY_FUNCTION__)) | ||||||
2010 | ExitL.contains(LI.getLoopFor(PredBB))) &&(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!" ) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2011, __extension__ __PRETTY_FUNCTION__)) | ||||||
2011 | "Predecessor not in a nested loop (or already visited)!")(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!" ) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2011, __extension__ __PRETTY_FUNCTION__)); | ||||||
2012 | continue; | ||||||
2013 | } | ||||||
2014 | |||||||
2015 | // We just insert into the loop set here. We'll add these blocks to the | ||||||
2016 | // exit loop after we build up the set in a deterministic order rather | ||||||
2017 | // than the predecessor-influenced visit order. | ||||||
2018 | bool Inserted = NewExitLoopBlocks.insert(PredBB).second; | ||||||
2019 | (void)Inserted; | ||||||
2020 | assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!" ) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2020, __extension__ __PRETTY_FUNCTION__)); | ||||||
2021 | |||||||
2022 | // And recurse through to its predecessors. | ||||||
2023 | Worklist.push_back(PredBB); | ||||||
2024 | } | ||||||
2025 | } while (!Worklist.empty()); | ||||||
2026 | |||||||
2027 | // If blocks in this exit loop were directly part of the original loop (as | ||||||
2028 | // opposed to a child loop) update the map to point to this exit loop. This | ||||||
2029 | // just updates a map and so the fact that the order is unstable is fine. | ||||||
2030 | for (auto *BB : NewExitLoopBlocks) | ||||||
2031 | if (Loop *BBL = LI.getLoopFor(BB)) | ||||||
2032 | if (BBL == &L || !L.contains(BBL)) | ||||||
2033 | LI.changeLoopFor(BB, &ExitL); | ||||||
2034 | |||||||
2035 | // We will remove the remaining unlooped blocks from this loop in the next | ||||||
2036 | // iteration or below. | ||||||
2037 | NewExitLoopBlocks.clear(); | ||||||
2038 | } | ||||||
2039 | |||||||
2040 | // Any remaining unlooped blocks are no longer part of any loop unless they | ||||||
2041 | // are part of some child loop. | ||||||
2042 | for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop()) | ||||||
2043 | RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks); | ||||||
2044 | for (auto *BB : UnloopedBlocks) | ||||||
2045 | if (Loop *BBL = LI.getLoopFor(BB)) | ||||||
2046 | if (BBL == &L || !L.contains(BBL)) | ||||||
2047 | LI.changeLoopFor(BB, nullptr); | ||||||
2048 | |||||||
2049 | // Sink all the child loops whose headers are no longer in the loop set to | ||||||
2050 | // the parent (or to be top level loops). We reach into the loop and directly | ||||||
2051 | // update its subloop vector to make this batch update efficient. | ||||||
2052 | auto &SubLoops = L.getSubLoopsVector(); | ||||||
2053 | auto SubLoopsSplitI = | ||||||
2054 | LoopBlockSet.empty() | ||||||
2055 | ? SubLoops.begin() | ||||||
2056 | : std::stable_partition( | ||||||
2057 | SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) { | ||||||
2058 | return LoopBlockSet.count(SubL->getHeader()); | ||||||
2059 | }); | ||||||
2060 | for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) { | ||||||
2061 | HoistedLoops.push_back(HoistedL); | ||||||
2062 | HoistedL->setParentLoop(nullptr); | ||||||
2063 | |||||||
2064 | // To compute the new parent of this hoisted loop we look at where we | ||||||
2065 | // placed the preheader above. We can't lookup the header itself because we | ||||||
2066 | // retained the mapping from the header to the hoisted loop. But the | ||||||
2067 | // preheader and header should have the exact same new parent computed | ||||||
2068 | // based on the set of exit blocks from the original loop as the preheader | ||||||
2069 | // is a predecessor of the header and so reached in the reverse walk. And | ||||||
2070 | // because the loops were all in simplified form the preheader of the | ||||||
2071 | // hoisted loop can't be part of some *other* loop. | ||||||
2072 | if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader())) | ||||||
2073 | NewParentL->addChildLoop(HoistedL); | ||||||
2074 | else | ||||||
2075 | LI.addTopLevelLoop(HoistedL); | ||||||
2076 | } | ||||||
2077 | SubLoops.erase(SubLoopsSplitI, SubLoops.end()); | ||||||
2078 | |||||||
2079 | // Actually delete the loop if nothing remained within it. | ||||||
2080 | if (Blocks.empty()) { | ||||||
2081 | assert(SubLoops.empty() &&(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!" ) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2082, __extension__ __PRETTY_FUNCTION__)) | ||||||
2082 | "Failed to remove all subloops from the original loop!")(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!" ) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2082, __extension__ __PRETTY_FUNCTION__)); | ||||||
2083 | if (Loop *ParentL = L.getParentLoop()) | ||||||
2084 | ParentL->removeChildLoop(llvm::find(*ParentL, &L)); | ||||||
2085 | else | ||||||
2086 | LI.removeLoop(llvm::find(LI, &L)); | ||||||
2087 | // markLoopAsDeleted for L should be triggered by the caller (it is typically | ||||||
2088 | // done by using the UnswitchCB callback). | ||||||
2089 | if (SE) | ||||||
2090 | SE->forgetBlockAndLoopDispositions(); | ||||||
2091 | LI.destroy(&L); | ||||||
2092 | return false; | ||||||
2093 | } | ||||||
2094 | |||||||
2095 | return true; | ||||||
2096 | } | ||||||
2097 | |||||||
2098 | /// Helper to visit a dominator subtree, invoking a callable on each node. | ||||||
2099 | /// | ||||||
2100 | /// Returning false at any point will stop walking past that node of the tree. | ||||||
2101 | template <typename CallableT> | ||||||
2102 | void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) { | ||||||
2103 | SmallVector<DomTreeNode *, 4> DomWorklist; | ||||||
2104 | DomWorklist.push_back(DT[BB]); | ||||||
2105 | #ifndef NDEBUG | ||||||
2106 | SmallPtrSet<DomTreeNode *, 4> Visited; | ||||||
2107 | Visited.insert(DT[BB]); | ||||||
2108 | #endif | ||||||
2109 | do { | ||||||
2110 | DomTreeNode *N = DomWorklist.pop_back_val(); | ||||||
2111 | |||||||
2112 | // Visit this node. | ||||||
2113 | if (!Callable(N->getBlock())) | ||||||
2114 | continue; | ||||||
2115 | |||||||
2116 | // Accumulate the child nodes. | ||||||
2117 | for (DomTreeNode *ChildN : *N) { | ||||||
2118 | assert(Visited.insert(ChildN).second &&(static_cast <bool> (Visited.insert(ChildN).second && "Cannot visit a node twice when walking a tree!") ? void (0) : __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2119, __extension__ __PRETTY_FUNCTION__)) | ||||||
2119 | "Cannot visit a node twice when walking a tree!")(static_cast <bool> (Visited.insert(ChildN).second && "Cannot visit a node twice when walking a tree!") ? void (0) : __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2119, __extension__ __PRETTY_FUNCTION__)); | ||||||
2120 | DomWorklist.push_back(ChildN); | ||||||
2121 | } | ||||||
2122 | } while (!DomWorklist.empty()); | ||||||
2123 | } | ||||||
2124 | |||||||
2125 | static void unswitchNontrivialInvariants( | ||||||
2126 | Loop &L, Instruction &TI, ArrayRef<Value *> Invariants, | ||||||
2127 | IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, | ||||||
2128 | AssumptionCache &AC, | ||||||
2129 | function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB, | ||||||
2130 | ScalarEvolution *SE, MemorySSAUpdater *MSSAU, | ||||||
2131 | function_ref<void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze) { | ||||||
2132 | auto *ParentBB = TI.getParent(); | ||||||
2133 | BranchInst *BI = dyn_cast<BranchInst>(&TI); | ||||||
2134 | SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI); | ||||||
2135 | |||||||
2136 | // We can only unswitch switches, conditional branches with an invariant | ||||||
2137 | // condition, or combining invariant conditions with an instruction or | ||||||
2138 | // partially invariant instructions. | ||||||
2139 | assert((SI || (BI && BI->isConditional())) &&(static_cast <bool> ((SI || (BI && BI->isConditional ())) && "Can only unswitch switches and conditional branch!" ) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2140, __extension__ __PRETTY_FUNCTION__)) | ||||||
2140 | "Can only unswitch switches and conditional branch!")(static_cast <bool> ((SI || (BI && BI->isConditional ())) && "Can only unswitch switches and conditional branch!" ) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2140, __extension__ __PRETTY_FUNCTION__)); | ||||||
2141 | bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty(); | ||||||
2142 | bool FullUnswitch = | ||||||
2143 | SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] && | ||||||
2144 | !PartiallyInvariant); | ||||||
2145 | if (FullUnswitch) | ||||||
2146 | assert(Invariants.size() == 1 &&(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!" ) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2147, __extension__ __PRETTY_FUNCTION__)) | ||||||
2147 | "Cannot have other invariants with full unswitching!")(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!" ) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2147, __extension__ __PRETTY_FUNCTION__)); | ||||||
2148 | else | ||||||
2149 | assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&(static_cast <bool> (isa<Instruction>(skipTrivialSelect (BI->getCondition())) && "Partial unswitching requires an instruction as the condition!" ) ? void (0) : __assert_fail ("isa<Instruction>(skipTrivialSelect(BI->getCondition())) && \"Partial unswitching requires an instruction as the condition!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2150, __extension__ __PRETTY_FUNCTION__)) | ||||||
2150 | "Partial unswitching requires an instruction as the condition!")(static_cast <bool> (isa<Instruction>(skipTrivialSelect (BI->getCondition())) && "Partial unswitching requires an instruction as the condition!" ) ? void (0) : __assert_fail ("isa<Instruction>(skipTrivialSelect(BI->getCondition())) && \"Partial unswitching requires an instruction as the condition!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2150, __extension__ __PRETTY_FUNCTION__)); | ||||||
2151 | |||||||
2152 | if (MSSAU && VerifyMemorySSA) | ||||||
2153 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
2154 | |||||||
2155 | // Constant and BBs tracking the cloned and continuing successor. When we are | ||||||
2156 | // unswitching the entire condition, this can just be trivially chosen to | ||||||
2157 | // unswitch towards `true`. However, when we are unswitching a set of | ||||||
2158 | // invariants combined with `and` or `or` or partially invariant instructions, | ||||||
2159 | // the combining operation determines the best direction to unswitch: we want | ||||||
2160 | // to unswitch the direction that will collapse the branch. | ||||||
2161 | bool Direction = true; | ||||||
2162 | int ClonedSucc = 0; | ||||||
2163 | if (!FullUnswitch) { | ||||||
2164 | Value *Cond = skipTrivialSelect(BI->getCondition()); | ||||||
2165 | (void)Cond; | ||||||
2166 | assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match (Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions " "can combine invariants being unswitched.") ? void (0) : __assert_fail ("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2169, __extension__ __PRETTY_FUNCTION__)) | ||||||
2167 | PartiallyInvariant) &&(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match (Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions " "can combine invariants being unswitched.") ? void (0) : __assert_fail ("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2169, __extension__ __PRETTY_FUNCTION__)) | ||||||
2168 | "Only `or`, `and`, an `select`, partially invariant instructions "(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match (Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions " "can combine invariants being unswitched.") ? void (0) : __assert_fail ("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2169, __extension__ __PRETTY_FUNCTION__)) | ||||||
2169 | "can combine invariants being unswitched.")(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match (Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions " "can combine invariants being unswitched.") ? void (0) : __assert_fail ("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2169, __extension__ __PRETTY_FUNCTION__)); | ||||||
2170 | if (!match(Cond, m_LogicalOr())) { | ||||||
2171 | if (match(Cond, m_LogicalAnd()) || | ||||||
2172 | (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) { | ||||||
2173 | Direction = false; | ||||||
2174 | ClonedSucc = 1; | ||||||
2175 | } | ||||||
2176 | } | ||||||
2177 | } | ||||||
2178 | |||||||
2179 | BasicBlock *RetainedSuccBB = | ||||||
2180 | BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest(); | ||||||
2181 | SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs; | ||||||
2182 | if (BI) | ||||||
2183 | UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc)); | ||||||
2184 | else | ||||||
2185 | for (auto Case : SI->cases()) | ||||||
2186 | if (Case.getCaseSuccessor() != RetainedSuccBB) | ||||||
2187 | UnswitchedSuccBBs.insert(Case.getCaseSuccessor()); | ||||||
2188 | |||||||
2189 | assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB ) && "Should not unswitch the same successor we are retaining!" ) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2190, __extension__ __PRETTY_FUNCTION__)) | ||||||
2190 | "Should not unswitch the same successor we are retaining!")(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB ) && "Should not unswitch the same successor we are retaining!" ) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2190, __extension__ __PRETTY_FUNCTION__)); | ||||||
2191 | |||||||
2192 | // The branch should be in this exact loop. Any inner loop's invariant branch | ||||||
2193 | // should be handled by unswitching that inner loop. The caller of this | ||||||
2194 | // routine should filter out any candidates that remain (but were skipped for | ||||||
2195 | // whatever reason). | ||||||
2196 | assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!")(static_cast <bool> (LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!") ? void (0) : __assert_fail ("LI.getLoopFor(ParentBB) == &L && \"Branch in an inner loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2196, __extension__ __PRETTY_FUNCTION__)); | ||||||
2197 | |||||||
2198 | // Compute the parent loop now before we start hacking on things. | ||||||
2199 | Loop *ParentL = L.getParentLoop(); | ||||||
2200 | // Get blocks in RPO order for MSSA update, before changing the CFG. | ||||||
2201 | LoopBlocksRPO LBRPO(&L); | ||||||
2202 | if (MSSAU) | ||||||
2203 | LBRPO.perform(&LI); | ||||||
2204 | |||||||
2205 | // Compute the outer-most loop containing one of our exit blocks. This is the | ||||||
2206 | // furthest up our loopnest which can be mutated, which we will use below to | ||||||
2207 | // update things. | ||||||
2208 | Loop *OuterExitL = &L; | ||||||
2209 | SmallVector<BasicBlock *, 4> ExitBlocks; | ||||||
2210 | L.getUniqueExitBlocks(ExitBlocks); | ||||||
2211 | for (auto *ExitBB : ExitBlocks) { | ||||||
2212 | // ExitBB can be an exit block for several levels in the loop nest. Make | ||||||
2213 | // sure we find the top most. | ||||||
2214 | Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI); | ||||||
2215 | if (!NewOuterExitL) { | ||||||
2216 | // We exited the entire nest with this block, so we're done. | ||||||
2217 | OuterExitL = nullptr; | ||||||
2218 | break; | ||||||
2219 | } | ||||||
2220 | if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL)) | ||||||
2221 | OuterExitL = NewOuterExitL; | ||||||
2222 | } | ||||||
2223 | |||||||
2224 | // At this point, we're definitely going to unswitch something so invalidate | ||||||
2225 | // any cached information in ScalarEvolution for the outer most loop | ||||||
2226 | // containing an exit block and all nested loops. | ||||||
2227 | if (SE) { | ||||||
2228 | if (OuterExitL) | ||||||
2229 | SE->forgetLoop(OuterExitL); | ||||||
2230 | else | ||||||
2231 | SE->forgetTopmostLoop(&L); | ||||||
2232 | SE->forgetBlockAndLoopDispositions(); | ||||||
2233 | } | ||||||
2234 | |||||||
2235 | // If the edge from this terminator to a successor dominates that successor, | ||||||
2236 | // store a map from each block in its dominator subtree to it. This lets us | ||||||
2237 | // tell when cloning for a particular successor if a block is dominated by | ||||||
2238 | // some *other* successor with a single data structure. We use this to | ||||||
2239 | // significantly reduce cloning. | ||||||
2240 | SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc; | ||||||
2241 | for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB), | ||||||
2242 | UnswitchedSuccBBs)) | ||||||
2243 | if (SuccBB->getUniquePredecessor() || | ||||||
2244 | llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) { | ||||||
2245 | return PredBB == ParentBB || DT.dominates(SuccBB, PredBB); | ||||||
2246 | })) | ||||||
2247 | visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) { | ||||||
2248 | DominatingSucc[BB] = SuccBB; | ||||||
2249 | return true; | ||||||
2250 | }); | ||||||
2251 | |||||||
2252 | // Split the preheader, so that we know that there is a safe place to insert | ||||||
2253 | // the conditional branch. We will change the preheader to have a conditional | ||||||
2254 | // branch on LoopCond. The original preheader will become the split point | ||||||
2255 | // between the unswitched versions, and we will have a new preheader for the | ||||||
2256 | // original loop. | ||||||
2257 | BasicBlock *SplitBB = L.getLoopPreheader(); | ||||||
2258 | BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU); | ||||||
2259 | |||||||
2260 | // Keep track of the dominator tree updates needed. | ||||||
2261 | SmallVector<DominatorTree::UpdateType, 4> DTUpdates; | ||||||
2262 | |||||||
2263 | // Clone the loop for each unswitched successor. | ||||||
2264 | SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; | ||||||
2265 | VMaps.reserve(UnswitchedSuccBBs.size()); | ||||||
2266 | SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs; | ||||||
2267 | for (auto *SuccBB : UnswitchedSuccBBs) { | ||||||
2268 | VMaps.emplace_back(new ValueToValueMapTy()); | ||||||
2269 | ClonedPHs[SuccBB] = buildClonedLoopBlocks( | ||||||
2270 | L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB, | ||||||
2271 | DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE); | ||||||
2272 | } | ||||||
2273 | |||||||
2274 | // Drop metadata if we may break its semantics by moving this instr into the | ||||||
2275 | // split block. | ||||||
2276 | if (TI.getMetadata(LLVMContext::MD_make_implicit)) { | ||||||
2277 | if (DropNonTrivialImplicitNullChecks) | ||||||
2278 | // Do not spend time trying to understand if we can keep it, just drop it | ||||||
2279 | // to save compile time. | ||||||
2280 | TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); | ||||||
2281 | else { | ||||||
2282 | // It is only legal to preserve make.implicit metadata if we are | ||||||
2283 | // guaranteed no reach implicit null check after following this branch. | ||||||
2284 | ICFLoopSafetyInfo SafetyInfo; | ||||||
2285 | SafetyInfo.computeLoopSafetyInfo(&L); | ||||||
2286 | if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) | ||||||
2287 | TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); | ||||||
2288 | } | ||||||
2289 | } | ||||||
2290 | |||||||
2291 | // The stitching of the branched code back together depends on whether we're | ||||||
2292 | // doing full unswitching or not with the exception that we always want to | ||||||
2293 | // nuke the initial terminator placed in the split block. | ||||||
2294 | SplitBB->getTerminator()->eraseFromParent(); | ||||||
2295 | if (FullUnswitch) { | ||||||
2296 | // Splice the terminator from the original loop and rewrite its | ||||||
2297 | // successors. | ||||||
2298 | SplitBB->splice(SplitBB->end(), ParentBB, TI.getIterator()); | ||||||
2299 | |||||||
2300 | // Keep a clone of the terminator for MSSA updates. | ||||||
2301 | Instruction *NewTI = TI.clone(); | ||||||
2302 | NewTI->insertInto(ParentBB, ParentBB->end()); | ||||||
2303 | |||||||
2304 | // First wire up the moved terminator to the preheaders. | ||||||
2305 | if (BI) { | ||||||
2306 | BasicBlock *ClonedPH = ClonedPHs.begin()->second; | ||||||
2307 | BI->setSuccessor(ClonedSucc, ClonedPH); | ||||||
2308 | BI->setSuccessor(1 - ClonedSucc, LoopPH); | ||||||
2309 | Value *Cond = skipTrivialSelect(BI->getCondition()); | ||||||
2310 | if (InsertFreeze) | ||||||
2311 | Cond = new FreezeInst( | ||||||
2312 | Cond, Cond->getName() + ".fr", BI); | ||||||
2313 | BI->setCondition(Cond); | ||||||
2314 | DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); | ||||||
2315 | } else { | ||||||
2316 | assert(SI && "Must either be a branch or switch!")(static_cast <bool> (SI && "Must either be a branch or switch!" ) ? void (0) : __assert_fail ("SI && \"Must either be a branch or switch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2316, __extension__ __PRETTY_FUNCTION__)); | ||||||
2317 | |||||||
2318 | // Walk the cases and directly update their successors. | ||||||
2319 | assert(SI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB && "Not retaining default successor!") ? void (0) : __assert_fail ("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2320, __extension__ __PRETTY_FUNCTION__)) | ||||||
2320 | "Not retaining default successor!")(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB && "Not retaining default successor!") ? void (0) : __assert_fail ("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2320, __extension__ __PRETTY_FUNCTION__)); | ||||||
2321 | SI->setDefaultDest(LoopPH); | ||||||
2322 | for (const auto &Case : SI->cases()) | ||||||
2323 | if (Case.getCaseSuccessor() == RetainedSuccBB) | ||||||
2324 | Case.setSuccessor(LoopPH); | ||||||
2325 | else | ||||||
2326 | Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); | ||||||
2327 | |||||||
2328 | if (InsertFreeze) | ||||||
2329 | SI->setCondition(new FreezeInst( | ||||||
2330 | SI->getCondition(), SI->getCondition()->getName() + ".fr", SI)); | ||||||
2331 | |||||||
2332 | // We need to use the set to populate domtree updates as even when there | ||||||
2333 | // are multiple cases pointing at the same successor we only want to | ||||||
2334 | // remove and insert one edge in the domtree. | ||||||
2335 | for (BasicBlock *SuccBB : UnswitchedSuccBBs) | ||||||
2336 | DTUpdates.push_back( | ||||||
2337 | {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second}); | ||||||
2338 | } | ||||||
2339 | |||||||
2340 | if (MSSAU) { | ||||||
2341 | DT.applyUpdates(DTUpdates); | ||||||
2342 | DTUpdates.clear(); | ||||||
2343 | |||||||
2344 | // Remove all but one edge to the retained block and all unswitched | ||||||
2345 | // blocks. This is to avoid having duplicate entries in the cloned Phis, | ||||||
2346 | // when we know we only keep a single edge for each case. | ||||||
2347 | MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB); | ||||||
2348 | for (BasicBlock *SuccBB : UnswitchedSuccBBs) | ||||||
2349 | MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB); | ||||||
2350 | |||||||
2351 | for (auto &VMap : VMaps) | ||||||
2352 | MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap, | ||||||
2353 | /*IgnoreIncomingWithNoClones=*/true); | ||||||
2354 | MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT); | ||||||
2355 | |||||||
2356 | // Remove all edges to unswitched blocks. | ||||||
2357 | for (BasicBlock *SuccBB : UnswitchedSuccBBs) | ||||||
2358 | MSSAU->removeEdge(ParentBB, SuccBB); | ||||||
2359 | } | ||||||
2360 | |||||||
2361 | // Now unhook the successor relationship as we'll be replacing | ||||||
2362 | // the terminator with a direct branch. This is much simpler for branches | ||||||
2363 | // than switches so we handle those first. | ||||||
2364 | if (BI) { | ||||||
2365 | // Remove the parent as a predecessor of the unswitched successor. | ||||||
2366 | assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 && "Only one possible unswitched block for a branch!") ? void ( 0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2367, __extension__ __PRETTY_FUNCTION__)) | ||||||
2367 | "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 && "Only one possible unswitched block for a branch!") ? void ( 0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2367, __extension__ __PRETTY_FUNCTION__)); | ||||||
2368 | BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin(); | ||||||
2369 | UnswitchedSuccBB->removePredecessor(ParentBB, | ||||||
2370 | /*KeepOneInputPHIs*/ true); | ||||||
2371 | DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB}); | ||||||
2372 | } else { | ||||||
2373 | // Note that we actually want to remove the parent block as a predecessor | ||||||
2374 | // of *every* case successor. The case successor is either unswitched, | ||||||
2375 | // completely eliminating an edge from the parent to that successor, or it | ||||||
2376 | // is a duplicate edge to the retained successor as the retained successor | ||||||
2377 | // is always the default successor and as we'll replace this with a direct | ||||||
2378 | // branch we no longer need the duplicate entries in the PHI nodes. | ||||||
2379 | SwitchInst *NewSI = cast<SwitchInst>(NewTI); | ||||||
2380 | assert(NewSI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB && "Not retaining default successor!") ? void (0) : __assert_fail ("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2381, __extension__ __PRETTY_FUNCTION__)) | ||||||
2381 | "Not retaining default successor!")(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB && "Not retaining default successor!") ? void (0) : __assert_fail ("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2381, __extension__ __PRETTY_FUNCTION__)); | ||||||
2382 | for (const auto &Case : NewSI->cases()) | ||||||
2383 | Case.getCaseSuccessor()->removePredecessor( | ||||||
2384 | ParentBB, | ||||||
2385 | /*KeepOneInputPHIs*/ true); | ||||||
2386 | |||||||
2387 | // We need to use the set to populate domtree updates as even when there | ||||||
2388 | // are multiple cases pointing at the same successor we only want to | ||||||
2389 | // remove and insert one edge in the domtree. | ||||||
2390 | for (BasicBlock *SuccBB : UnswitchedSuccBBs) | ||||||
2391 | DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB}); | ||||||
2392 | } | ||||||
2393 | |||||||
2394 | // After MSSAU update, remove the cloned terminator instruction NewTI. | ||||||
2395 | ParentBB->getTerminator()->eraseFromParent(); | ||||||
2396 | |||||||
2397 | // Create a new unconditional branch to the continuing block (as opposed to | ||||||
2398 | // the one cloned). | ||||||
2399 | BranchInst::Create(RetainedSuccBB, ParentBB); | ||||||
2400 | } else { | ||||||
2401 | assert(BI && "Only branches have partial unswitching.")(static_cast <bool> (BI && "Only branches have partial unswitching." ) ? void (0) : __assert_fail ("BI && \"Only branches have partial unswitching.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2401, __extension__ __PRETTY_FUNCTION__)); | ||||||
2402 | assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 && "Only one possible unswitched block for a branch!") ? void ( 0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2403, __extension__ __PRETTY_FUNCTION__)) | ||||||
2403 | "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 && "Only one possible unswitched block for a branch!") ? void ( 0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2403, __extension__ __PRETTY_FUNCTION__)); | ||||||
2404 | BasicBlock *ClonedPH = ClonedPHs.begin()->second; | ||||||
2405 | // When doing a partial unswitch, we have to do a bit more work to build up | ||||||
2406 | // the branch in the split block. | ||||||
2407 | if (PartiallyInvariant) | ||||||
2408 | buildPartialInvariantUnswitchConditionalBranch( | ||||||
2409 | *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); | ||||||
2410 | else { | ||||||
2411 | buildPartialUnswitchConditionalBranch( | ||||||
2412 | *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, | ||||||
2413 | FreezeLoopUnswitchCond, BI, &AC, DT); | ||||||
2414 | } | ||||||
2415 | DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); | ||||||
2416 | |||||||
2417 | if (MSSAU) { | ||||||
2418 | DT.applyUpdates(DTUpdates); | ||||||
2419 | DTUpdates.clear(); | ||||||
2420 | |||||||
2421 | // Perform MSSA cloning updates. | ||||||
2422 | for (auto &VMap : VMaps) | ||||||
2423 | MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap, | ||||||
2424 | /*IgnoreIncomingWithNoClones=*/true); | ||||||
2425 | MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT); | ||||||
2426 | } | ||||||
2427 | } | ||||||
2428 | |||||||
2429 | // Apply the updates accumulated above to get an up-to-date dominator tree. | ||||||
2430 | DT.applyUpdates(DTUpdates); | ||||||
2431 | |||||||
2432 | // Now that we have an accurate dominator tree, first delete the dead cloned | ||||||
2433 | // blocks so that we can accurately build any cloned loops. It is important to | ||||||
2434 | // not delete the blocks from the original loop yet because we still want to | ||||||
2435 | // reference the original loop to understand the cloned loop's structure. | ||||||
2436 | deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU); | ||||||
2437 | |||||||
2438 | // Build the cloned loop structure itself. This may be substantially | ||||||
2439 | // different from the original structure due to the simplified CFG. This also | ||||||
2440 | // handles inserting all the cloned blocks into the correct loops. | ||||||
2441 | SmallVector<Loop *, 4> NonChildClonedLoops; | ||||||
2442 | for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps) | ||||||
2443 | buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops); | ||||||
2444 | |||||||
2445 | // Now that our cloned loops have been built, we can update the original loop. | ||||||
2446 | // First we delete the dead blocks from it and then we rebuild the loop | ||||||
2447 | // structure taking these deletions into account. | ||||||
2448 | deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE,DestroyLoopCB); | ||||||
2449 | |||||||
2450 | if (MSSAU && VerifyMemorySSA) | ||||||
2451 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
2452 | |||||||
2453 | SmallVector<Loop *, 4> HoistedLoops; | ||||||
2454 | bool IsStillLoop = | ||||||
2455 | rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE); | ||||||
2456 | |||||||
2457 | if (MSSAU && VerifyMemorySSA) | ||||||
2458 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
2459 | |||||||
2460 | // This transformation has a high risk of corrupting the dominator tree, and | ||||||
2461 | // the below steps to rebuild loop structures will result in hard to debug | ||||||
2462 | // errors in that case so verify that the dominator tree is sane first. | ||||||
2463 | // FIXME: Remove this when the bugs stop showing up and rely on existing | ||||||
2464 | // verification steps. | ||||||
2465 | assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel ::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2465, __extension__ __PRETTY_FUNCTION__)); | ||||||
2466 | |||||||
2467 | if (BI && !PartiallyInvariant) { | ||||||
2468 | // If we unswitched a branch which collapses the condition to a known | ||||||
2469 | // constant we want to replace all the uses of the invariants within both | ||||||
2470 | // the original and cloned blocks. We do this here so that we can use the | ||||||
2471 | // now updated dominator tree to identify which side the users are on. | ||||||
2472 | assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 && "Only one possible unswitched block for a branch!") ? void ( 0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2473, __extension__ __PRETTY_FUNCTION__)) | ||||||
2473 | "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 && "Only one possible unswitched block for a branch!") ? void ( 0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2473, __extension__ __PRETTY_FUNCTION__)); | ||||||
2474 | BasicBlock *ClonedPH = ClonedPHs.begin()->second; | ||||||
2475 | |||||||
2476 | // When considering multiple partially-unswitched invariants | ||||||
2477 | // we cant just go replace them with constants in both branches. | ||||||
2478 | // | ||||||
2479 | // For 'AND' we infer that true branch ("continue") means true | ||||||
2480 | // for each invariant operand. | ||||||
2481 | // For 'OR' we can infer that false branch ("continue") means false | ||||||
2482 | // for each invariant operand. | ||||||
2483 | // So it happens that for multiple-partial case we dont replace | ||||||
2484 | // in the unswitched branch. | ||||||
2485 | bool ReplaceUnswitched = | ||||||
2486 | FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant; | ||||||
2487 | |||||||
2488 | ConstantInt *UnswitchedReplacement = | ||||||
2489 | Direction ? ConstantInt::getTrue(BI->getContext()) | ||||||
2490 | : ConstantInt::getFalse(BI->getContext()); | ||||||
2491 | ConstantInt *ContinueReplacement = | ||||||
2492 | Direction ? ConstantInt::getFalse(BI->getContext()) | ||||||
2493 | : ConstantInt::getTrue(BI->getContext()); | ||||||
2494 | for (Value *Invariant : Invariants) { | ||||||
2495 | assert(!isa<Constant>(Invariant) &&(static_cast <bool> (!isa<Constant>(Invariant) && "Should not be replacing constant values!") ? void (0) : __assert_fail ("!isa<Constant>(Invariant) && \"Should not be replacing constant values!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2496, __extension__ __PRETTY_FUNCTION__)) | ||||||
2496 | "Should not be replacing constant values!")(static_cast <bool> (!isa<Constant>(Invariant) && "Should not be replacing constant values!") ? void (0) : __assert_fail ("!isa<Constant>(Invariant) && \"Should not be replacing constant values!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2496, __extension__ __PRETTY_FUNCTION__)); | ||||||
2497 | // Use make_early_inc_range here as set invalidates the iterator. | ||||||
2498 | for (Use &U : llvm::make_early_inc_range(Invariant->uses())) { | ||||||
2499 | Instruction *UserI = dyn_cast<Instruction>(U.getUser()); | ||||||
2500 | if (!UserI) | ||||||
2501 | continue; | ||||||
2502 | |||||||
2503 | // Replace it with the 'continue' side if in the main loop body, and the | ||||||
2504 | // unswitched if in the cloned blocks. | ||||||
2505 | if (DT.dominates(LoopPH, UserI->getParent())) | ||||||
2506 | U.set(ContinueReplacement); | ||||||
2507 | else if (ReplaceUnswitched && | ||||||
2508 | DT.dominates(ClonedPH, UserI->getParent())) | ||||||
2509 | U.set(UnswitchedReplacement); | ||||||
2510 | } | ||||||
2511 | } | ||||||
2512 | } | ||||||
2513 | |||||||
2514 | // We can change which blocks are exit blocks of all the cloned sibling | ||||||
2515 | // loops, the current loop, and any parent loops which shared exit blocks | ||||||
2516 | // with the current loop. As a consequence, we need to re-form LCSSA for | ||||||
2517 | // them. But we shouldn't need to re-form LCSSA for any child loops. | ||||||
2518 | // FIXME: This could be made more efficient by tracking which exit blocks are | ||||||
2519 | // new, and focusing on them, but that isn't likely to be necessary. | ||||||
2520 | // | ||||||
2521 | // In order to reasonably rebuild LCSSA we need to walk inside-out across the | ||||||
2522 | // loop nest and update every loop that could have had its exits changed. We | ||||||
2523 | // also need to cover any intervening loops. We add all of these loops to | ||||||
2524 | // a list and sort them by loop depth to achieve this without updating | ||||||
2525 | // unnecessary loops. | ||||||
2526 | auto UpdateLoop = [&](Loop &UpdateL) { | ||||||
2527 | #ifndef NDEBUG | ||||||
2528 | UpdateL.verifyLoop(); | ||||||
2529 | for (Loop *ChildL : UpdateL) { | ||||||
2530 | ChildL->verifyLoop(); | ||||||
2531 | assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (ChildL->isRecursivelyLCSSAForm( DT, LI) && "Perturbed a child loop's LCSSA form!") ? void (0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2532, __extension__ __PRETTY_FUNCTION__)) | ||||||
2532 | "Perturbed a child loop's LCSSA form!")(static_cast <bool> (ChildL->isRecursivelyLCSSAForm( DT, LI) && "Perturbed a child loop's LCSSA form!") ? void (0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2532, __extension__ __PRETTY_FUNCTION__)); | ||||||
2533 | } | ||||||
2534 | #endif | ||||||
2535 | // First build LCSSA for this loop so that we can preserve it when | ||||||
2536 | // forming dedicated exits. We don't want to perturb some other loop's | ||||||
2537 | // LCSSA while doing that CFG edit. | ||||||
2538 | formLCSSA(UpdateL, DT, &LI); | ||||||
2539 | |||||||
2540 | // For loops reached by this loop's original exit blocks we may | ||||||
2541 | // introduced new, non-dedicated exits. At least try to re-form dedicated | ||||||
2542 | // exits for these loops. This may fail if they couldn't have dedicated | ||||||
2543 | // exits to start with. | ||||||
2544 | formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true); | ||||||
2545 | }; | ||||||
2546 | |||||||
2547 | // For non-child cloned loops and hoisted loops, we just need to update LCSSA | ||||||
2548 | // and we can do it in any order as they don't nest relative to each other. | ||||||
2549 | // | ||||||
2550 | // Also check if any of the loops we have updated have become top-level loops | ||||||
2551 | // as that will necessitate widening the outer loop scope. | ||||||
2552 | for (Loop *UpdatedL : | ||||||
2553 | llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) { | ||||||
2554 | UpdateLoop(*UpdatedL); | ||||||
2555 | if (UpdatedL->isOutermost()) | ||||||
2556 | OuterExitL = nullptr; | ||||||
2557 | } | ||||||
2558 | if (IsStillLoop) { | ||||||
2559 | UpdateLoop(L); | ||||||
2560 | if (L.isOutermost()) | ||||||
2561 | OuterExitL = nullptr; | ||||||
2562 | } | ||||||
2563 | |||||||
2564 | // If the original loop had exit blocks, walk up through the outer most loop | ||||||
2565 | // of those exit blocks to update LCSSA and form updated dedicated exits. | ||||||
2566 | if (OuterExitL != &L) | ||||||
2567 | for (Loop *OuterL = ParentL; OuterL != OuterExitL; | ||||||
2568 | OuterL = OuterL->getParentLoop()) | ||||||
2569 | UpdateLoop(*OuterL); | ||||||
2570 | |||||||
2571 | #ifndef NDEBUG | ||||||
2572 | // Verify the entire loop structure to catch any incorrect updates before we | ||||||
2573 | // progress in the pass pipeline. | ||||||
2574 | LI.verify(DT); | ||||||
2575 | #endif | ||||||
2576 | |||||||
2577 | // Now that we've unswitched something, make callbacks to report the changes. | ||||||
2578 | // For that we need to merge together the updated loops and the cloned loops | ||||||
2579 | // and check whether the original loop survived. | ||||||
2580 | SmallVector<Loop *, 4> SibLoops; | ||||||
2581 | for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) | ||||||
2582 | if (UpdatedL->getParentLoop() == ParentL) | ||||||
2583 | SibLoops.push_back(UpdatedL); | ||||||
2584 | UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops); | ||||||
2585 | |||||||
2586 | if (MSSAU && VerifyMemorySSA) | ||||||
2587 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
2588 | |||||||
2589 | if (BI) | ||||||
2590 | ++NumBranches; | ||||||
2591 | else | ||||||
2592 | ++NumSwitches; | ||||||
2593 | } | ||||||
2594 | |||||||
2595 | /// Recursively compute the cost of a dominator subtree based on the per-block | ||||||
2596 | /// cost map provided. | ||||||
2597 | /// | ||||||
2598 | /// The recursive computation is memozied into the provided DT-indexed cost map | ||||||
2599 | /// to allow querying it for most nodes in the domtree without it becoming | ||||||
2600 | /// quadratic. | ||||||
2601 | static InstructionCost computeDomSubtreeCost( | ||||||
2602 | DomTreeNode &N, | ||||||
2603 | const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap, | ||||||
2604 | SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) { | ||||||
2605 | // Don't accumulate cost (or recurse through) blocks not in our block cost | ||||||
2606 | // map and thus not part of the duplication cost being considered. | ||||||
2607 | auto BBCostIt = BBCostMap.find(N.getBlock()); | ||||||
2608 | if (BBCostIt == BBCostMap.end()) | ||||||
2609 | return 0; | ||||||
2610 | |||||||
2611 | // Lookup this node to see if we already computed its cost. | ||||||
2612 | auto DTCostIt = DTCostMap.find(&N); | ||||||
2613 | if (DTCostIt != DTCostMap.end()) | ||||||
2614 | return DTCostIt->second; | ||||||
2615 | |||||||
2616 | // If not, we have to compute it. We can't use insert above and update | ||||||
2617 | // because computing the cost may insert more things into the map. | ||||||
2618 | InstructionCost Cost = std::accumulate( | ||||||
2619 | N.begin(), N.end(), BBCostIt->second, | ||||||
2620 | [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost { | ||||||
2621 | return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap); | ||||||
2622 | }); | ||||||
2623 | bool Inserted = DTCostMap.insert({&N, Cost}).second; | ||||||
2624 | (void)Inserted; | ||||||
2625 | assert(Inserted && "Should not insert a node while visiting children!")(static_cast <bool> (Inserted && "Should not insert a node while visiting children!" ) ? void (0) : __assert_fail ("Inserted && \"Should not insert a node while visiting children!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2625, __extension__ __PRETTY_FUNCTION__)); | ||||||
2626 | return Cost; | ||||||
2627 | } | ||||||
2628 | |||||||
2629 | /// Turns a select instruction into implicit control flow branch, | ||||||
2630 | /// making the following replacement: | ||||||
2631 | /// | ||||||
2632 | /// head: | ||||||
2633 | /// --code before select-- | ||||||
2634 | /// select %cond, %trueval, %falseval | ||||||
2635 | /// --code after select-- | ||||||
2636 | /// | ||||||
2637 | /// into | ||||||
2638 | /// | ||||||
2639 | /// head: | ||||||
2640 | /// --code before select-- | ||||||
2641 | /// br i1 %cond, label %then, label %tail | ||||||
2642 | /// | ||||||
2643 | /// then: | ||||||
2644 | /// br %tail | ||||||
2645 | /// | ||||||
2646 | /// tail: | ||||||
2647 | /// phi [ %trueval, %then ], [ %falseval, %head] | ||||||
2648 | /// unreachable | ||||||
2649 | /// | ||||||
2650 | /// It also makes all relevant DT and LI updates, so that all structures are in | ||||||
2651 | /// valid state after this transform. | ||||||
2652 | static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, | ||||||
2653 | LoopInfo &LI, MemorySSAUpdater *MSSAU, | ||||||
2654 | AssumptionCache *AC) { | ||||||
2655 | LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Turning " << *SI << " into a branch.\n"; } } while (false); | ||||||
2656 | BasicBlock *HeadBB = SI->getParent(); | ||||||
2657 | |||||||
2658 | DomTreeUpdater DTU = | ||||||
2659 | DomTreeUpdater(DT, DomTreeUpdater::UpdateStrategy::Eager); | ||||||
2660 | SplitBlockAndInsertIfThen(SI->getCondition(), SI, false, | ||||||
2661 | SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI); | ||||||
2662 | auto *CondBr = cast<BranchInst>(HeadBB->getTerminator()); | ||||||
2663 | BasicBlock *ThenBB = CondBr->getSuccessor(0), | ||||||
2664 | *TailBB = CondBr->getSuccessor(1); | ||||||
2665 | if (MSSAU) | ||||||
2666 | MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI); | ||||||
2667 | |||||||
2668 | PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI); | ||||||
2669 | Phi->addIncoming(SI->getTrueValue(), ThenBB); | ||||||
2670 | Phi->addIncoming(SI->getFalseValue(), HeadBB); | ||||||
2671 | SI->replaceAllUsesWith(Phi); | ||||||
2672 | SI->eraseFromParent(); | ||||||
2673 | |||||||
2674 | if (MSSAU && VerifyMemorySSA) | ||||||
2675 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
2676 | |||||||
2677 | ++NumSelects; | ||||||
2678 | return CondBr; | ||||||
2679 | } | ||||||
2680 | |||||||
2681 | /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch, | ||||||
2682 | /// making the following replacement: | ||||||
2683 | /// | ||||||
2684 | /// --code before guard-- | ||||||
2685 | /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] | ||||||
2686 | /// --code after guard-- | ||||||
2687 | /// | ||||||
2688 | /// into | ||||||
2689 | /// | ||||||
2690 | /// --code before guard-- | ||||||
2691 | /// br i1 %cond, label %guarded, label %deopt | ||||||
2692 | /// | ||||||
2693 | /// guarded: | ||||||
2694 | /// --code after guard-- | ||||||
2695 | /// | ||||||
2696 | /// deopt: | ||||||
2697 | /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ] | ||||||
2698 | /// unreachable | ||||||
2699 | /// | ||||||
2700 | /// It also makes all relevant DT and LI updates, so that all structures are in | ||||||
2701 | /// valid state after this transform. | ||||||
2702 | static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, | ||||||
2703 | DominatorTree &DT, LoopInfo &LI, | ||||||
2704 | MemorySSAUpdater *MSSAU) { | ||||||
2705 | SmallVector<DominatorTree::UpdateType, 4> DTUpdates; | ||||||
2706 | LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Turning " << *GI << " into a branch.\n"; } } while (false); | ||||||
2707 | BasicBlock *CheckBB = GI->getParent(); | ||||||
2708 | |||||||
2709 | if (MSSAU && VerifyMemorySSA) | ||||||
2710 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
2711 | |||||||
2712 | // Remove all CheckBB's successors from DomTree. A block can be seen among | ||||||
2713 | // successors more than once, but for DomTree it should be added only once. | ||||||
2714 | SmallPtrSet<BasicBlock *, 4> Successors; | ||||||
2715 | for (auto *Succ : successors(CheckBB)) | ||||||
2716 | if (Successors.insert(Succ).second) | ||||||
2717 | DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ}); | ||||||
2718 | |||||||
2719 | Instruction *DeoptBlockTerm = | ||||||
2720 | SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true); | ||||||
2721 | BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator()); | ||||||
2722 | // SplitBlockAndInsertIfThen inserts control flow that branches to | ||||||
2723 | // DeoptBlockTerm if the condition is true. We want the opposite. | ||||||
2724 | CheckBI->swapSuccessors(); | ||||||
2725 | |||||||
2726 | BasicBlock *GuardedBlock = CheckBI->getSuccessor(0); | ||||||
2727 | GuardedBlock->setName("guarded"); | ||||||
2728 | CheckBI->getSuccessor(1)->setName("deopt"); | ||||||
2729 | BasicBlock *DeoptBlock = CheckBI->getSuccessor(1); | ||||||
2730 | |||||||
2731 | if (MSSAU) | ||||||
2732 | MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI); | ||||||
2733 | |||||||
2734 | GI->moveBefore(DeoptBlockTerm); | ||||||
2735 | GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext())); | ||||||
2736 | |||||||
2737 | // Add new successors of CheckBB into DomTree. | ||||||
2738 | for (auto *Succ : successors(CheckBB)) | ||||||
2739 | DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ}); | ||||||
2740 | |||||||
2741 | // Now the blocks that used to be CheckBB's successors are GuardedBlock's | ||||||
2742 | // successors. | ||||||
2743 | for (auto *Succ : Successors) | ||||||
2744 | DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ}); | ||||||
2745 | |||||||
2746 | // Make proper changes to DT. | ||||||
2747 | DT.applyUpdates(DTUpdates); | ||||||
2748 | // Inform LI of a new loop block. | ||||||
2749 | L.addBasicBlockToLoop(GuardedBlock, LI); | ||||||
2750 | |||||||
2751 | if (MSSAU) { | ||||||
2752 | MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI)); | ||||||
2753 | MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator); | ||||||
2754 | if (VerifyMemorySSA) | ||||||
2755 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
2756 | } | ||||||
2757 | |||||||
2758 | ++NumGuards; | ||||||
2759 | return CheckBI; | ||||||
2760 | } | ||||||
2761 | |||||||
2762 | /// Cost multiplier is a way to limit potentially exponential behavior | ||||||
2763 | /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch | ||||||
2764 | /// candidates available. Also accounting for the number of "sibling" loops with | ||||||
2765 | /// the idea to account for previous unswitches that already happened on this | ||||||
2766 | /// cluster of loops. There was an attempt to keep this formula simple, | ||||||
2767 | /// just enough to limit the worst case behavior. Even if it is not that simple | ||||||
2768 | /// now it is still not an attempt to provide a detailed heuristic size | ||||||
2769 | /// prediction. | ||||||
2770 | /// | ||||||
2771 | /// TODO: Make a proper accounting of "explosion" effect for all kinds of | ||||||
2772 | /// unswitch candidates, making adequate predictions instead of wild guesses. | ||||||
2773 | /// That requires knowing not just the number of "remaining" candidates but | ||||||
2774 | /// also costs of unswitching for each of these candidates. | ||||||
2775 | static int CalculateUnswitchCostMultiplier( | ||||||
2776 | const Instruction &TI, const Loop &L, const LoopInfo &LI, | ||||||
2777 | const DominatorTree &DT, | ||||||
2778 | ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) { | ||||||
2779 | |||||||
2780 | // Guards and other exiting conditions do not contribute to exponential | ||||||
2781 | // explosion as soon as they dominate the latch (otherwise there might be | ||||||
2782 | // another path to the latch remaining that does not allow to eliminate the | ||||||
2783 | // loop copy on unswitch). | ||||||
2784 | const BasicBlock *Latch = L.getLoopLatch(); | ||||||
2785 | const BasicBlock *CondBlock = TI.getParent(); | ||||||
2786 | if (DT.dominates(CondBlock, Latch) && | ||||||
2787 | (isGuard(&TI) || | ||||||
2788 | (TI.isTerminator() && | ||||||
2789 | llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) { | ||||||
2790 | return L.contains(SuccBB); | ||||||
2791 | }) <= 1))) { | ||||||
2792 | NumCostMultiplierSkipped++; | ||||||
2793 | return 1; | ||||||
2794 | } | ||||||
2795 | |||||||
2796 | auto *ParentL = L.getParentLoop(); | ||||||
2797 | int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size() | ||||||
2798 | : std::distance(LI.begin(), LI.end())); | ||||||
2799 | // Count amount of clones that all the candidates might cause during | ||||||
2800 | // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its | ||||||
2801 | // cases. | ||||||
2802 | int UnswitchedClones = 0; | ||||||
2803 | for (const auto &Candidate : UnswitchCandidates) { | ||||||
2804 | const Instruction *CI = Candidate.TI; | ||||||
2805 | const BasicBlock *CondBlock = CI->getParent(); | ||||||
2806 | bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch); | ||||||
2807 | if (isa<SelectInst>(CI)) { | ||||||
2808 | UnswitchedClones++; | ||||||
2809 | continue; | ||||||
2810 | } | ||||||
2811 | if (isGuard(CI)) { | ||||||
2812 | if (!SkipExitingSuccessors) | ||||||
2813 | UnswitchedClones++; | ||||||
2814 | continue; | ||||||
2815 | } | ||||||
2816 | int NonExitingSuccessors = | ||||||
2817 | llvm::count_if(successors(CondBlock), | ||||||
2818 | [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) { | ||||||
2819 | return !SkipExitingSuccessors || L.contains(SuccBB); | ||||||
2820 | }); | ||||||
2821 | UnswitchedClones += Log2_32(NonExitingSuccessors); | ||||||
2822 | } | ||||||
2823 | |||||||
2824 | // Ignore up to the "unscaled candidates" number of unswitch candidates | ||||||
2825 | // when calculating the power-of-two scaling of the cost. The main idea | ||||||
2826 | // with this control is to allow a small number of unswitches to happen | ||||||
2827 | // and rely more on siblings multiplier (see below) when the number | ||||||
2828 | // of candidates is small. | ||||||
2829 | unsigned ClonesPower = | ||||||
2830 | std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0); | ||||||
2831 | |||||||
2832 | // Allowing top-level loops to spread a bit more than nested ones. | ||||||
2833 | int SiblingsMultiplier = | ||||||
2834 | std::max((ParentL ? SiblingsCount | ||||||
2835 | : SiblingsCount / (int)UnswitchSiblingsToplevelDiv), | ||||||
2836 | 1); | ||||||
2837 | // Compute the cost multiplier in a way that won't overflow by saturating | ||||||
2838 | // at an upper bound. | ||||||
2839 | int CostMultiplier; | ||||||
2840 | if (ClonesPower > Log2_32(UnswitchThreshold) || | ||||||
2841 | SiblingsMultiplier > UnswitchThreshold) | ||||||
2842 | CostMultiplier = UnswitchThreshold; | ||||||
2843 | else | ||||||
2844 | CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower), | ||||||
2845 | (int)UnswitchThreshold); | ||||||
2846 | |||||||
2847 | LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplierdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Computed multiplier " << CostMultiplier << " (siblings " << SiblingsMultiplier << " * clones " << (1 << ClonesPower) << ")" << " for unswitch candidate: " << TI << "\n"; } } while (false) | ||||||
2848 | << " (siblings " << SiblingsMultiplier << " * clones "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Computed multiplier " << CostMultiplier << " (siblings " << SiblingsMultiplier << " * clones " << (1 << ClonesPower) << ")" << " for unswitch candidate: " << TI << "\n"; } } while (false) | ||||||
2849 | << (1 << ClonesPower) << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Computed multiplier " << CostMultiplier << " (siblings " << SiblingsMultiplier << " * clones " << (1 << ClonesPower) << ")" << " for unswitch candidate: " << TI << "\n"; } } while (false) | ||||||
2850 | << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Computed multiplier " << CostMultiplier << " (siblings " << SiblingsMultiplier << " * clones " << (1 << ClonesPower) << ")" << " for unswitch candidate: " << TI << "\n"; } } while (false); | ||||||
2851 | return CostMultiplier; | ||||||
2852 | } | ||||||
2853 | |||||||
2854 | static bool collectUnswitchCandidates( | ||||||
2855 | SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, | ||||||
2856 | IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, | ||||||
2857 | const Loop &L, const LoopInfo &LI, AAResults &AA, | ||||||
2858 | const MemorySSAUpdater *MSSAU) { | ||||||
2859 | assert(UnswitchCandidates.empty() && "Should be!")(static_cast <bool> (UnswitchCandidates.empty() && "Should be!") ? void (0) : __assert_fail ("UnswitchCandidates.empty() && \"Should be!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2859, __extension__ __PRETTY_FUNCTION__)); | ||||||
2860 | // Whether or not we should also collect guards in the loop. | ||||||
2861 | bool CollectGuards = false; | ||||||
2862 | if (UnswitchGuards) { | ||||||
2863 | auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction( | ||||||
2864 | Intrinsic::getName(Intrinsic::experimental_guard)); | ||||||
2865 | if (GuardDecl && !GuardDecl->use_empty()) | ||||||
2866 | CollectGuards = true; | ||||||
2867 | } | ||||||
2868 | |||||||
2869 | for (auto *BB : L.blocks()) { | ||||||
2870 | if (LI.getLoopFor(BB) != &L) | ||||||
2871 | continue; | ||||||
2872 | |||||||
2873 | for (auto &I : *BB) { | ||||||
2874 | if (auto *SI = dyn_cast<SelectInst>(&I)) { | ||||||
2875 | auto *Cond = SI->getCondition(); | ||||||
2876 | if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond)) | ||||||
2877 | UnswitchCandidates.push_back({&I, {Cond}}); | ||||||
2878 | } else if (CollectGuards && isGuard(&I)) { | ||||||
2879 | auto *Cond = | ||||||
2880 | skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0)); | ||||||
2881 | // TODO: Support AND, OR conditions and partial unswitching. | ||||||
2882 | if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond)) | ||||||
2883 | UnswitchCandidates.push_back({&I, {Cond}}); | ||||||
2884 | } | ||||||
2885 | } | ||||||
2886 | |||||||
2887 | if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { | ||||||
2888 | // We can only consider fully loop-invariant switch conditions as we need | ||||||
2889 | // to completely eliminate the switch after unswitching. | ||||||
2890 | if (!isa<Constant>(SI->getCondition()) && | ||||||
2891 | L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor()) | ||||||
2892 | UnswitchCandidates.push_back({SI, {SI->getCondition()}}); | ||||||
2893 | continue; | ||||||
2894 | } | ||||||
2895 | |||||||
2896 | auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); | ||||||
2897 | if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) || | ||||||
2898 | BI->getSuccessor(0) == BI->getSuccessor(1)) | ||||||
2899 | continue; | ||||||
2900 | |||||||
2901 | Value *Cond = skipTrivialSelect(BI->getCondition()); | ||||||
2902 | if (isa<Constant>(Cond)) | ||||||
2903 | continue; | ||||||
2904 | |||||||
2905 | if (L.isLoopInvariant(Cond)) { | ||||||
2906 | UnswitchCandidates.push_back({BI, {Cond}}); | ||||||
2907 | continue; | ||||||
2908 | } | ||||||
2909 | |||||||
2910 | Instruction &CondI = *cast<Instruction>(Cond); | ||||||
2911 | if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) { | ||||||
2912 | TinyPtrVector<Value *> Invariants = | ||||||
2913 | collectHomogenousInstGraphLoopInvariants(L, CondI, LI); | ||||||
2914 | if (Invariants.empty()) | ||||||
2915 | continue; | ||||||
2916 | |||||||
2917 | UnswitchCandidates.push_back({BI, std::move(Invariants)}); | ||||||
2918 | continue; | ||||||
2919 | } | ||||||
2920 | } | ||||||
2921 | |||||||
2922 | if (MSSAU
| ||||||
2923 | !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) { | ||||||
2924 | return TerminatorAndInvariants.TI == L.getHeader()->getTerminator(); | ||||||
2925 | })) { | ||||||
2926 | MemorySSA *MSSA = MSSAU->getMemorySSA(); | ||||||
2927 | if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) { | ||||||
2928 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition " << *Info->InstToDuplicate[0] << "\n"; } } while (false) | ||||||
2929 | dbgs() << "simple-loop-unswitch: Found partially invariant condition "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition " << *Info->InstToDuplicate[0] << "\n"; } } while (false) | ||||||
2930 | << *Info->InstToDuplicate[0] << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition " << *Info->InstToDuplicate[0] << "\n"; } } while (false); | ||||||
2931 | PartialIVInfo = *Info; | ||||||
2932 | PartialIVCondBranch = L.getHeader()->getTerminator(); | ||||||
2933 | TinyPtrVector<Value *> ValsToDuplicate; | ||||||
2934 | llvm::append_range(ValsToDuplicate, Info->InstToDuplicate); | ||||||
2935 | UnswitchCandidates.push_back( | ||||||
2936 | {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)}); | ||||||
2937 | } | ||||||
2938 | } | ||||||
2939 | return !UnswitchCandidates.empty(); | ||||||
2940 | } | ||||||
2941 | |||||||
2942 | /// Tries to canonicalize condition described by: | ||||||
2943 | /// | ||||||
2944 | /// br (LHS pred RHS), label IfTrue, label IfFalse | ||||||
2945 | /// | ||||||
2946 | /// into its equivalent where `Pred` is something that we support for injected | ||||||
2947 | /// invariants (so far it is limited to ult), LHS in canonicalized form is | ||||||
2948 | /// non-invariant and RHS is an invariant. | ||||||
2949 | static void canonicalizeForInvariantConditionInjection( | ||||||
2950 | ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, | ||||||
2951 | BasicBlock *&IfFalse, const Loop &L) { | ||||||
2952 | if (!L.contains(IfTrue)) { | ||||||
2953 | Pred = ICmpInst::getInversePredicate(Pred); | ||||||
2954 | std::swap(IfTrue, IfFalse); | ||||||
2955 | } | ||||||
2956 | |||||||
2957 | // Move loop-invariant argument to RHS position. | ||||||
2958 | if (L.isLoopInvariant(LHS)) { | ||||||
2959 | Pred = ICmpInst::getSwappedPredicate(Pred); | ||||||
2960 | std::swap(LHS, RHS); | ||||||
2961 | } | ||||||
2962 | |||||||
2963 | if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) { | ||||||
2964 | // Turn "x >=s 0" into "x <u UMIN_INT" | ||||||
2965 | Pred = ICmpInst::ICMP_ULT; | ||||||
2966 | RHS = ConstantInt::get( | ||||||
2967 | RHS->getContext(), | ||||||
2968 | APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth())); | ||||||
2969 | } | ||||||
2970 | } | ||||||
2971 | |||||||
2972 | /// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS ) | ||||||
2973 | /// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by | ||||||
2974 | /// injecting a loop-invariant condition. | ||||||
2975 | static bool shouldTryInjectInvariantCondition( | ||||||
2976 | const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, | ||||||
2977 | const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) { | ||||||
2978 | if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS)) | ||||||
2979 | return false; | ||||||
2980 | // TODO: Support other predicates. | ||||||
2981 | if (Pred != ICmpInst::ICMP_ULT) | ||||||
2982 | return false; | ||||||
2983 | // TODO: Support non-loop-exiting branches? | ||||||
2984 | if (!L.contains(IfTrue) || L.contains(IfFalse)) | ||||||
2985 | return false; | ||||||
2986 | // FIXME: For some reason this causes problems with MSSA updates, need to | ||||||
2987 | // investigate why. So far, just don't unswitch latch. | ||||||
2988 | if (L.getHeader() == IfTrue) | ||||||
2989 | return false; | ||||||
2990 | return true; | ||||||
2991 | } | ||||||
2992 | |||||||
2993 | /// Returns true, if metadata on \p BI allows us to optimize branching into \p | ||||||
2994 | /// TakenSucc via injection of invariant conditions. The branch should be not | ||||||
2995 | /// enough and not previously unswitched, the information about this comes from | ||||||
2996 | /// the metadata. | ||||||
2997 | bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, | ||||||
2998 | const BasicBlock *TakenSucc) { | ||||||
2999 | // Skip branches that have already been unswithed this way. After successful | ||||||
3000 | // unswitching of injected condition, we will still have a copy of this loop | ||||||
3001 | // which looks exactly the same as original one. To prevent the 2nd attempt | ||||||
3002 | // of unswitching it in the same pass, mark this branch as "nothing to do | ||||||
3003 | // here". | ||||||
3004 | if (BI->hasMetadata("llvm.invariant.condition.injection.disabled")) | ||||||
3005 | return false; | ||||||
3006 | SmallVector<uint32_t> Weights; | ||||||
3007 | if (!extractBranchWeights(*BI, Weights)) | ||||||
3008 | return false; | ||||||
3009 | unsigned T = InjectInvariantConditionHotnesThreshold; | ||||||
3010 | BranchProbability LikelyTaken(T - 1, T); | ||||||
3011 | |||||||
3012 | assert(Weights.size() == 2 && "Unexpected profile data!")(static_cast <bool> (Weights.size() == 2 && "Unexpected profile data!" ) ? void (0) : __assert_fail ("Weights.size() == 2 && \"Unexpected profile data!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3012, __extension__ __PRETTY_FUNCTION__)); | ||||||
3013 | size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1; | ||||||
3014 | auto Num = Weights[Idx]; | ||||||
3015 | auto Denom = Weights[0] + Weights[1]; | ||||||
3016 | // Degenerate or overflowed metadata. | ||||||
3017 | if (Denom == 0 || Num > Denom) | ||||||
3018 | return false; | ||||||
3019 | BranchProbability ActualTaken(Num, Denom); | ||||||
3020 | if (LikelyTaken > ActualTaken) | ||||||
3021 | return false; | ||||||
3022 | return true; | ||||||
3023 | } | ||||||
3024 | |||||||
3025 | /// Materialize pending invariant condition of the given candidate into IR. The | ||||||
3026 | /// injected loop-invariant condition implies the original loop-variant branch | ||||||
3027 | /// condition, so the materialization turns | ||||||
3028 | /// | ||||||
3029 | /// loop_block: | ||||||
3030 | /// ... | ||||||
3031 | /// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc | ||||||
3032 | /// | ||||||
3033 | /// into | ||||||
3034 | /// | ||||||
3035 | /// preheader: | ||||||
3036 | /// %invariant_cond = LHS pred RHS | ||||||
3037 | /// ... | ||||||
3038 | /// loop_block: | ||||||
3039 | /// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck | ||||||
3040 | /// OriginalCheck: | ||||||
3041 | /// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc | ||||||
3042 | /// ... | ||||||
3043 | static NonTrivialUnswitchCandidate | ||||||
3044 | injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, | ||||||
3045 | DominatorTree &DT, LoopInfo &LI, | ||||||
3046 | AssumptionCache &AC, MemorySSAUpdater *MSSAU) { | ||||||
3047 | assert(Candidate.hasPendingInjection() && "Nothing to inject!")(static_cast <bool> (Candidate.hasPendingInjection() && "Nothing to inject!") ? void (0) : __assert_fail ("Candidate.hasPendingInjection() && \"Nothing to inject!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3047, __extension__ __PRETTY_FUNCTION__)); | ||||||
3048 | BasicBlock *Preheader = L.getLoopPreheader(); | ||||||
3049 | assert(Preheader && "Loop is not in simplified form?")(static_cast <bool> (Preheader && "Loop is not in simplified form?" ) ? void (0) : __assert_fail ("Preheader && \"Loop is not in simplified form?\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3049, __extension__ __PRETTY_FUNCTION__)); | ||||||
3050 | assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&(static_cast <bool> (LI.getLoopFor(Candidate.TI->getParent ()) == &L && "Unswitching branch of inner loop!") ? void (0) : __assert_fail ("LI.getLoopFor(Candidate.TI->getParent()) == &L && \"Unswitching branch of inner loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3051, __extension__ __PRETTY_FUNCTION__)) | ||||||
3051 | "Unswitching branch of inner loop!")(static_cast <bool> (LI.getLoopFor(Candidate.TI->getParent ()) == &L && "Unswitching branch of inner loop!") ? void (0) : __assert_fail ("LI.getLoopFor(Candidate.TI->getParent()) == &L && \"Unswitching branch of inner loop!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3051, __extension__ __PRETTY_FUNCTION__)); | ||||||
3052 | |||||||
3053 | auto Pred = Candidate.PendingInjection->Pred; | ||||||
3054 | auto *LHS = Candidate.PendingInjection->LHS; | ||||||
3055 | auto *RHS = Candidate.PendingInjection->RHS; | ||||||
3056 | auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc; | ||||||
3057 | auto *TI = cast<BranchInst>(Candidate.TI); | ||||||
3058 | auto *BB = Candidate.TI->getParent(); | ||||||
3059 | auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1) | ||||||
3060 | : TI->getSuccessor(0); | ||||||
3061 | // FIXME: Remove this once limitation on successors is lifted. | ||||||
3062 | assert(L.contains(InLoopSucc) && "Not supported yet!")(static_cast <bool> (L.contains(InLoopSucc) && "Not supported yet!" ) ? void (0) : __assert_fail ("L.contains(InLoopSucc) && \"Not supported yet!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3062, __extension__ __PRETTY_FUNCTION__)); | ||||||
3063 | assert(!L.contains(OutOfLoopSucc) && "Not supported yet!")(static_cast <bool> (!L.contains(OutOfLoopSucc) && "Not supported yet!") ? void (0) : __assert_fail ("!L.contains(OutOfLoopSucc) && \"Not supported yet!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3063, __extension__ __PRETTY_FUNCTION__)); | ||||||
3064 | auto &Ctx = BB->getContext(); | ||||||
3065 | |||||||
3066 | IRBuilder<> Builder(Preheader->getTerminator()); | ||||||
3067 | assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!")(static_cast <bool> (ICmpInst::isUnsigned(Pred) && "Not supported yet!") ? void (0) : __assert_fail ("ICmpInst::isUnsigned(Pred) && \"Not supported yet!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3067, __extension__ __PRETTY_FUNCTION__)); | ||||||
3068 | if (LHS->getType() != RHS->getType()) { | ||||||
3069 | if (LHS->getType()->getIntegerBitWidth() < | ||||||
3070 | RHS->getType()->getIntegerBitWidth()) | ||||||
3071 | LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide"); | ||||||
3072 | else | ||||||
3073 | RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide"); | ||||||
3074 | } | ||||||
3075 | // Do not use builder here: CreateICmp may simplify this into a constant and | ||||||
3076 | // unswitching will break. Better optimize it away later. | ||||||
3077 | auto *InjectedCond = | ||||||
3078 | ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond", | ||||||
3079 | Preheader->getTerminator()); | ||||||
3080 | auto *OldCond = TI->getCondition(); | ||||||
3081 | |||||||
3082 | BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check", | ||||||
3083 | BB->getParent(), InLoopSucc); | ||||||
3084 | Builder.SetInsertPoint(TI); | ||||||
3085 | auto *InvariantBr = | ||||||
3086 | Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock); | ||||||
3087 | |||||||
3088 | Builder.SetInsertPoint(CheckBlock); | ||||||
3089 | auto *NewTerm = Builder.CreateCondBr(OldCond, InLoopSucc, OutOfLoopSucc); | ||||||
3090 | |||||||
3091 | TI->eraseFromParent(); | ||||||
3092 | // Prevent infinite unswitching. | ||||||
3093 | NewTerm->setMetadata("llvm.invariant.condition.injection.disabled", | ||||||
3094 | MDNode::get(BB->getContext(), {})); | ||||||
3095 | |||||||
3096 | // Fixup phis. | ||||||
3097 | for (auto &I : *InLoopSucc) { | ||||||
3098 | auto *PN = dyn_cast<PHINode>(&I); | ||||||
3099 | if (!PN) | ||||||
3100 | break; | ||||||
3101 | auto *Inc = PN->getIncomingValueForBlock(BB); | ||||||
3102 | PN->addIncoming(Inc, CheckBlock); | ||||||
3103 | } | ||||||
3104 | OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock); | ||||||
3105 | |||||||
3106 | SmallVector<DominatorTree::UpdateType, 4> DTUpdates = { | ||||||
3107 | { DominatorTree::Insert, BB, CheckBlock }, | ||||||
3108 | { DominatorTree::Insert, CheckBlock, InLoopSucc }, | ||||||
3109 | { DominatorTree::Insert, CheckBlock, OutOfLoopSucc }, | ||||||
3110 | { DominatorTree::Delete, BB, OutOfLoopSucc } | ||||||
3111 | }; | ||||||
3112 | |||||||
3113 | DT.applyUpdates(DTUpdates); | ||||||
3114 | if (MSSAU) | ||||||
3115 | MSSAU->applyUpdates(DTUpdates, DT); | ||||||
3116 | L.addBasicBlockToLoop(CheckBlock, LI); | ||||||
3117 | |||||||
3118 | #ifndef NDEBUG | ||||||
3119 | DT.verify(); | ||||||
3120 | LI.verify(DT); | ||||||
3121 | if (MSSAU && VerifyMemorySSA) | ||||||
3122 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||
3123 | #endif | ||||||
3124 | |||||||
3125 | // TODO: In fact, cost of unswitching a new invariant candidate is *slightly* | ||||||
3126 | // higher because we have just inserted a new block. Need to think how to | ||||||
3127 | // adjust the cost of injected candidates when it was first computed. | ||||||
3128 | LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Injected a new loop-invariant branch " << *InvariantBr << " and considering it for unswitching." ; } } while (false) | ||||||
3129 | << " and considering it for unswitching.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Injected a new loop-invariant branch " << *InvariantBr << " and considering it for unswitching." ; } } while (false); | ||||||
3130 | ++NumInvariantConditionsInjected; | ||||||
3131 | return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond }, | ||||||
3132 | Candidate.Cost); | ||||||
3133 | } | ||||||
3134 | |||||||
3135 | /// Given chain of loop branch conditions looking like: | ||||||
3136 | /// br (Variant < Invariant1) | ||||||
3137 | /// br (Variant < Invariant2) | ||||||
3138 | /// br (Variant < Invariant3) | ||||||
3139 | /// ... | ||||||
3140 | /// collect set of invariant conditions on which we want to unswitch, which | ||||||
3141 | /// look like: | ||||||
3142 | /// Invariant1 <= Invariant2 | ||||||
3143 | /// Invariant2 <= Invariant3 | ||||||
3144 | /// ... | ||||||
3145 | /// Though they might not immediately exist in the IR, we can still inject them. | ||||||
3146 | static bool insertCandidatesWithPendingInjections( | ||||||
3147 | SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L, | ||||||
3148 | ICmpInst::Predicate Pred, ArrayRef<CompareDesc> Compares, | ||||||
3149 | const DominatorTree &DT) { | ||||||
3150 | |||||||
3151 | assert(ICmpInst::isRelational(Pred))(static_cast <bool> (ICmpInst::isRelational(Pred)) ? void (0) : __assert_fail ("ICmpInst::isRelational(Pred)", "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp" , 3151, __extension__ __PRETTY_FUNCTION__)); | ||||||
3152 | assert(ICmpInst::isStrictPredicate(Pred))(static_cast <bool> (ICmpInst::isStrictPredicate(Pred)) ? void (0) : __assert_fail ("ICmpInst::isStrictPredicate(Pred)" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3152, __extension__ __PRETTY_FUNCTION__)); | ||||||
3153 | if (Compares.size() < 2) | ||||||
3154 | return false; | ||||||
3155 | ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred); | ||||||
3156 | for (auto Prev = Compares.begin(), Next = Compares.begin() + 1; | ||||||
3157 | Next != Compares.end(); ++Prev, ++Next) { | ||||||
3158 | Value *LHS = Next->Invariant; | ||||||
3159 | Value *RHS = Prev->Invariant; | ||||||
3160 | BasicBlock *InLoopSucc = Prev->InLoopSucc; | ||||||
3161 | InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc); | ||||||
3162 | NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS }, | ||||||
3163 | std::nullopt, std::move(ToInject)); | ||||||
3164 | UnswitchCandidates.push_back(std::move(Candidate)); | ||||||
3165 | } | ||||||
3166 | return true; | ||||||
3167 | } | ||||||
3168 | |||||||
3169 | /// Collect unswitch candidates by invariant conditions that are not immediately | ||||||
3170 | /// present in the loop. However, they can be injected into the code if we | ||||||
3171 | /// decide it's profitable. | ||||||
3172 | /// An example of such conditions is following: | ||||||
3173 | /// | ||||||
3174 | /// for (...) { | ||||||
3175 | /// x = load ... | ||||||
3176 | /// if (! x <u C1) break; | ||||||
3177 | /// if (! x <u C2) break; | ||||||
3178 | /// <do something> | ||||||
3179 | /// } | ||||||
3180 | /// | ||||||
3181 | /// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <= | ||||||
3182 | /// C2" automatically implies "x <u C2", so we can get rid of one of | ||||||
3183 | /// loop-variant checks in unswitched loop version. | ||||||
3184 | static bool collectUnswitchCandidatesWithInjections( | ||||||
3185 | SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, | ||||||
3186 | IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, | ||||||
3187 | const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, | ||||||
3188 | const MemorySSAUpdater *MSSAU) { | ||||||
3189 | if (!InjectInvariantConditions) | ||||||
3190 | return false; | ||||||
3191 | |||||||
3192 | if (!DT.isReachableFromEntry(L.getHeader())) | ||||||
3193 | return false; | ||||||
3194 | auto *Latch = L.getLoopLatch(); | ||||||
3195 | // Need to have a single latch and a preheader. | ||||||
3196 | if (!Latch) | ||||||
3197 | return false; | ||||||
3198 | assert(L.getLoopPreheader() && "Must have a preheader!")(static_cast <bool> (L.getLoopPreheader() && "Must have a preheader!" ) ? void (0) : __assert_fail ("L.getLoopPreheader() && \"Must have a preheader!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3198, __extension__ __PRETTY_FUNCTION__)); | ||||||
3199 | |||||||
3200 | DenseMap<Value *, SmallVector<CompareDesc, 4> > CandidatesULT; | ||||||
3201 | // Traverse the conditions that dominate latch (and therefore dominate each | ||||||
3202 | // other). | ||||||
3203 | for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock()); | ||||||
3204 | DTN = DTN->getIDom()) { | ||||||
3205 | ICmpInst::Predicate Pred; | ||||||
3206 | Value *LHS = nullptr, *RHS = nullptr; | ||||||
3207 | BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; | ||||||
3208 | auto *BB = DTN->getBlock(); | ||||||
3209 | // Ignore inner loops. | ||||||
3210 | if (LI.getLoopFor(BB) != &L) | ||||||
3211 | continue; | ||||||
3212 | auto *Term = BB->getTerminator(); | ||||||
3213 | if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), | ||||||
3214 | m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) | ||||||
3215 | continue; | ||||||
3216 | if (!LHS->getType()->isIntegerTy()) | ||||||
3217 | continue; | ||||||
3218 | canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse, | ||||||
3219 | L); | ||||||
3220 | if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L)) | ||||||
3221 | continue; | ||||||
3222 | if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue)) | ||||||
3223 | continue; | ||||||
3224 | // Strip ZEXT for unsigned predicate. | ||||||
3225 | // TODO: once signed predicates are supported, also strip SEXT. | ||||||
3226 | CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue); | ||||||
3227 | while (auto *Zext = dyn_cast<ZExtInst>(LHS)) | ||||||
3228 | LHS = Zext->getOperand(0); | ||||||
3229 | CandidatesULT[LHS].push_back(Desc); | ||||||
3230 | } | ||||||
3231 | |||||||
3232 | bool Found = false; | ||||||
3233 | for (auto &It : CandidatesULT) | ||||||
3234 | Found |= insertCandidatesWithPendingInjections( | ||||||
3235 | UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT); | ||||||
3236 | return Found; | ||||||
3237 | } | ||||||
3238 | |||||||
3239 | static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) { | ||||||
3240 | if (!L.isSafeToClone()) | ||||||
3241 | return false; | ||||||
3242 | for (auto *BB : L.blocks()) | ||||||
3243 | for (auto &I : *BB) { | ||||||
3244 | if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) | ||||||
3245 | return false; | ||||||
3246 | if (auto *CB = dyn_cast<CallBase>(&I)) { | ||||||
3247 | assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().")(static_cast <bool> (!CB->cannotDuplicate() && "Checked by L.isSafeToClone().") ? void (0) : __assert_fail ( "!CB->cannotDuplicate() && \"Checked by L.isSafeToClone().\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3247, __extension__ __PRETTY_FUNCTION__)); | ||||||
3248 | if (CB->isConvergent()) | ||||||
3249 | return false; | ||||||
3250 | } | ||||||
3251 | } | ||||||
3252 | |||||||
3253 | // Check if there are irreducible CFG cycles in this loop. If so, we cannot | ||||||
3254 | // easily unswitch non-trivial edges out of the loop. Doing so might turn the | ||||||
3255 | // irreducible control flow into reducible control flow and introduce new | ||||||
3256 | // loops "out of thin air". If we ever discover important use cases for doing | ||||||
3257 | // this, we can add support to loop unswitch, but it is a lot of complexity | ||||||
3258 | // for what seems little or no real world benefit. | ||||||
3259 | LoopBlocksRPO RPOT(&L); | ||||||
3260 | RPOT.perform(&LI); | ||||||
3261 | if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI)) | ||||||
3262 | return false; | ||||||
3263 | |||||||
3264 | SmallVector<BasicBlock *, 4> ExitBlocks; | ||||||
3265 | L.getUniqueExitBlocks(ExitBlocks); | ||||||
3266 | // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch | ||||||
3267 | // instruction as we don't know how to split those exit blocks. | ||||||
3268 | // FIXME: We should teach SplitBlock to handle this and remove this | ||||||
3269 | // restriction. | ||||||
3270 | for (auto *ExitBB : ExitBlocks) { | ||||||
3271 | auto *I = ExitBB->getFirstNonPHI(); | ||||||
3272 | if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) { | ||||||
3273 | LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad/catchswitch " "in exit block\n"; } } while (false) | ||||||
3274 | "in exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad/catchswitch " "in exit block\n"; } } while (false); | ||||||
3275 | return false; | ||||||
3276 | } | ||||||
3277 | } | ||||||
3278 | |||||||
3279 | return true; | ||||||
3280 | } | ||||||
3281 | |||||||
3282 | static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate( | ||||||
3283 | ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L, | ||||||
3284 | const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, | ||||||
3285 | const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) { | ||||||
3286 | // Given that unswitching these terminators will require duplicating parts of | ||||||
3287 | // the loop, so we need to be able to model that cost. Compute the ephemeral | ||||||
3288 | // values and set up a data structure to hold per-BB costs. We cache each | ||||||
3289 | // block's cost so that we don't recompute this when considering different | ||||||
3290 | // subsets of the loop for duplication during unswitching. | ||||||
3291 | SmallPtrSet<const Value *, 4> EphValues; | ||||||
3292 | CodeMetrics::collectEphemeralValues(&L, &AC, EphValues); | ||||||
3293 | SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap; | ||||||
3294 | |||||||
3295 | // Compute the cost of each block, as well as the total loop cost. Also, bail | ||||||
3296 | // out if we see instructions which are incompatible with loop unswitching | ||||||
3297 | // (convergent, noduplicate, or cross-basic-block tokens). | ||||||
3298 | // FIXME: We might be able to safely handle some of these in non-duplicated | ||||||
3299 | // regions. | ||||||
3300 | TargetTransformInfo::TargetCostKind CostKind = | ||||||
3301 | L.getHeader()->getParent()->hasMinSize() | ||||||
3302 | ? TargetTransformInfo::TCK_CodeSize | ||||||
3303 | : TargetTransformInfo::TCK_SizeAndLatency; | ||||||
3304 | InstructionCost LoopCost = 0; | ||||||
3305 | for (auto *BB : L.blocks()) { | ||||||
3306 | InstructionCost Cost = 0; | ||||||
3307 | for (auto &I : *BB) { | ||||||
3308 | if (EphValues.count(&I)) | ||||||
3309 | continue; | ||||||
3310 | Cost += TTI.getInstructionCost(&I, CostKind); | ||||||
3311 | } | ||||||
3312 | assert(Cost >= 0 && "Must not have negative costs!")(static_cast <bool> (Cost >= 0 && "Must not have negative costs!" ) ? void (0) : __assert_fail ("Cost >= 0 && \"Must not have negative costs!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3312, __extension__ __PRETTY_FUNCTION__)); | ||||||
3313 | LoopCost += Cost; | ||||||
3314 | assert(LoopCost >= 0 && "Must not have negative loop costs!")(static_cast <bool> (LoopCost >= 0 && "Must not have negative loop costs!" ) ? void (0) : __assert_fail ("LoopCost >= 0 && \"Must not have negative loop costs!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3314, __extension__ __PRETTY_FUNCTION__)); | ||||||
3315 | BBCostMap[BB] = Cost; | ||||||
3316 | } | ||||||
3317 | LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Total loop cost: " << LoopCost << "\n"; } } while (false); | ||||||
3318 | |||||||
3319 | // Now we find the best candidate by searching for the one with the following | ||||||
3320 | // properties in order: | ||||||
3321 | // | ||||||
3322 | // 1) An unswitching cost below the threshold | ||||||
3323 | // 2) The smallest number of duplicated unswitch candidates (to avoid | ||||||
3324 | // creating redundant subsequent unswitching) | ||||||
3325 | // 3) The smallest cost after unswitching. | ||||||
3326 | // | ||||||
3327 | // We prioritize reducing fanout of unswitch candidates provided the cost | ||||||
3328 | // remains below the threshold because this has a multiplicative effect. | ||||||
3329 | // | ||||||
3330 | // This requires memoizing each dominator subtree to avoid redundant work. | ||||||
3331 | // | ||||||
3332 | // FIXME: Need to actually do the number of candidates part above. | ||||||
3333 | SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap; | ||||||
3334 | // Given a terminator which might be unswitched, computes the non-duplicated | ||||||
3335 | // cost for that terminator. | ||||||
3336 | auto ComputeUnswitchedCost = [&](Instruction &TI, | ||||||
3337 | bool FullUnswitch) -> InstructionCost { | ||||||
3338 | BasicBlock &BB = *TI.getParent(); | ||||||
3339 | SmallPtrSet<BasicBlock *, 4> Visited; | ||||||
3340 | |||||||
3341 | InstructionCost Cost = 0; | ||||||
3342 | for (BasicBlock *SuccBB : successors(&BB)) { | ||||||
3343 | // Don't count successors more than once. | ||||||
3344 | if (!Visited.insert(SuccBB).second) | ||||||
3345 | continue; | ||||||
3346 | |||||||
3347 | // If this is a partial unswitch candidate, then it must be a conditional | ||||||
3348 | // branch with a condition of either `or`, `and`, their corresponding | ||||||
3349 | // select forms or partially invariant instructions. In that case, one of | ||||||
3350 | // the successors is necessarily duplicated, so don't even try to remove | ||||||
3351 | // its cost. | ||||||
3352 | if (!FullUnswitch
| ||||||
3353 | auto &BI = cast<BranchInst>(TI); | ||||||
3354 | Value *Cond = skipTrivialSelect(BI.getCondition()); | ||||||
3355 | if (match(Cond, m_LogicalAnd())) { | ||||||
3356 | if (SuccBB == BI.getSuccessor(1)) | ||||||
3357 | continue; | ||||||
3358 | } else if (match(Cond, m_LogicalOr())) { | ||||||
3359 | if (SuccBB == BI.getSuccessor(0)) | ||||||
3360 | continue; | ||||||
3361 | } else if ((PartialIVInfo.KnownValue->isOneValue() && | ||||||
| |||||||
3362 | SuccBB == BI.getSuccessor(0)) || | ||||||
3363 | (!PartialIVInfo.KnownValue->isOneValue() && | ||||||
3364 | SuccBB == BI.getSuccessor(1))) | ||||||
3365 | continue; | ||||||
3366 | } | ||||||
3367 | |||||||
3368 | // This successor's domtree will not need to be duplicated after | ||||||
3369 | // unswitching if the edge to the successor dominates it (and thus the | ||||||
3370 | // entire tree). This essentially means there is no other path into this | ||||||
3371 | // subtree and so it will end up live in only one clone of the loop. | ||||||
3372 | if (SuccBB->getUniquePredecessor() || | ||||||
3373 | llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) { | ||||||
3374 | return PredBB == &BB || DT.dominates(SuccBB, PredBB); | ||||||
3375 | })) { | ||||||
3376 | Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap); | ||||||
3377 | assert(Cost <= LoopCost &&(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!" ) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3378, __extension__ __PRETTY_FUNCTION__)) | ||||||
3378 | "Non-duplicated cost should never exceed total loop cost!")(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!" ) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3378, __extension__ __PRETTY_FUNCTION__)); | ||||||
3379 | } | ||||||
3380 | } | ||||||
3381 | |||||||
3382 | // Now scale the cost by the number of unique successors minus one. We | ||||||
3383 | // subtract one because there is already at least one copy of the entire | ||||||
3384 | // loop. This is computing the new cost of unswitching a condition. | ||||||
3385 | // Note that guards always have 2 unique successors that are implicit and | ||||||
3386 | // will be materialized if we decide to unswitch it. | ||||||
3387 | int SuccessorsCount = | ||||||
3388 | isGuard(&TI) || isa<SelectInst>(TI) ? 2 : Visited.size(); | ||||||
3389 | assert(SuccessorsCount > 1 &&(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!" ) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3390, __extension__ __PRETTY_FUNCTION__)) | ||||||
3390 | "Cannot unswitch a condition without multiple distinct successors!")(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!" ) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3390, __extension__ __PRETTY_FUNCTION__)); | ||||||
3391 | return (LoopCost - Cost) * (SuccessorsCount - 1); | ||||||
3392 | }; | ||||||
3393 | |||||||
3394 | std::optional<NonTrivialUnswitchCandidate> Best; | ||||||
3395 | for (auto &Candidate : UnswitchCandidates) { | ||||||
3396 | Instruction &TI = *Candidate.TI; | ||||||
3397 | ArrayRef<Value *> Invariants = Candidate.Invariants; | ||||||
3398 | BranchInst *BI = dyn_cast<BranchInst>(&TI); | ||||||
3399 | bool FullUnswitch = | ||||||
3400 | !BI
| ||||||
3401 | (Invariants.size() == 1 && | ||||||
3402 | Invariants[0] == skipTrivialSelect(BI->getCondition())); | ||||||
3403 | InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch); | ||||||
3404 | // Calculate cost multiplier which is a tool to limit potentially | ||||||
3405 | // exponential behavior of loop-unswitch. | ||||||
3406 | if (EnableUnswitchCostMultiplier) { | ||||||
3407 | int CostMultiplier = | ||||||
3408 | CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates); | ||||||
3409 | assert((static_cast <bool> ((CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold" ) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3411, __extension__ __PRETTY_FUNCTION__)) | ||||||
3410 | (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold" ) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3411, __extension__ __PRETTY_FUNCTION__)) | ||||||
3411 | "cost multiplier needs to be in the range of 1..UnswitchThreshold")(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold" ) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3411, __extension__ __PRETTY_FUNCTION__)); | ||||||
3412 | CandidateCost *= CostMultiplier; | ||||||
3413 | LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Computed cost of " << CandidateCost << " (multiplier: " << CostMultiplier << ")" << " for unswitch candidate: " << TI << "\n"; } } while (false) | ||||||
3414 | << " (multiplier: " << CostMultiplier << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Computed cost of " << CandidateCost << " (multiplier: " << CostMultiplier << ")" << " for unswitch candidate: " << TI << "\n"; } } while (false) | ||||||
3415 | << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Computed cost of " << CandidateCost << " (multiplier: " << CostMultiplier << ")" << " for unswitch candidate: " << TI << "\n"; } } while (false); | ||||||
3416 | } else { | ||||||
3417 | LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Computed cost of " << CandidateCost << " for unswitch candidate: " << TI << "\n"; } } while (false) | ||||||
3418 | << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Computed cost of " << CandidateCost << " for unswitch candidate: " << TI << "\n"; } } while (false); | ||||||
3419 | } | ||||||
3420 | |||||||
3421 | if (!Best || CandidateCost < Best->Cost) { | ||||||
3422 | Best = Candidate; | ||||||
3423 | Best->Cost = CandidateCost; | ||||||
3424 | } | ||||||
3425 | } | ||||||
3426 | assert(Best && "Must be!")(static_cast <bool> (Best && "Must be!") ? void (0) : __assert_fail ("Best && \"Must be!\"", "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp" , 3426, __extension__ __PRETTY_FUNCTION__)); | ||||||
3427 | return *Best; | ||||||
3428 | } | ||||||
3429 | |||||||
3430 | // Insert a freeze on an unswitched branch if all is true: | ||||||
3431 | // 1. freeze-loop-unswitch-cond option is true | ||||||
3432 | // 2. The branch may not execute in the loop pre-transformation. If a branch may | ||||||
3433 | // not execute and could cause UB, it would always cause UB if it is hoisted outside | ||||||
3434 | // of the loop. Insert a freeze to prevent this case. | ||||||
3435 | // 3. The branch condition may be poison or undef | ||||||
3436 | static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, | ||||||
3437 | AssumptionCache &AC) { | ||||||
3438 | assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI))(static_cast <bool> (isa<BranchInst>(TI) || isa< SwitchInst>(TI)) ? void (0) : __assert_fail ("isa<BranchInst>(TI) || isa<SwitchInst>(TI)" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3438, __extension__ __PRETTY_FUNCTION__)); | ||||||
3439 | if (!FreezeLoopUnswitchCond) | ||||||
3440 | return false; | ||||||
3441 | |||||||
3442 | ICFLoopSafetyInfo SafetyInfo; | ||||||
3443 | SafetyInfo.computeLoopSafetyInfo(&L); | ||||||
3444 | if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) | ||||||
3445 | return false; | ||||||
3446 | |||||||
3447 | Value *Cond; | ||||||
3448 | if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) | ||||||
3449 | Cond = skipTrivialSelect(BI->getCondition()); | ||||||
3450 | else | ||||||
3451 | Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition()); | ||||||
3452 | return !isGuaranteedNotToBeUndefOrPoison( | ||||||
3453 | Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT); | ||||||
3454 | } | ||||||
3455 | |||||||
3456 | static bool unswitchBestCondition( | ||||||
3457 | Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, | ||||||
3458 | AAResults &AA, TargetTransformInfo &TTI, | ||||||
3459 | function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB, | ||||||
3460 | ScalarEvolution *SE, MemorySSAUpdater *MSSAU, | ||||||
3461 | function_ref<void(Loop &, StringRef)> DestroyLoopCB) { | ||||||
3462 | // Collect all invariant conditions within this loop (as opposed to an inner | ||||||
3463 | // loop which would be handled when visiting that inner loop). | ||||||
3464 | SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates; | ||||||
3465 | IVConditionInfo PartialIVInfo; | ||||||
3466 | Instruction *PartialIVCondBranch = nullptr; | ||||||
3467 | collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo, | ||||||
3468 | PartialIVCondBranch, L, LI, AA, MSSAU); | ||||||
3469 | collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo, | ||||||
3470 | PartialIVCondBranch, L, DT, LI, AA, | ||||||
3471 | MSSAU); | ||||||
3472 | // If we didn't find any candidates, we're done. | ||||||
3473 | if (UnswitchCandidates.empty()) | ||||||
3474 | return false; | ||||||
3475 | |||||||
3476 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Considering " << UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n" ; } } while (false) | ||||||
3477 | dbgs() << "Considering " << UnswitchCandidates.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Considering " << UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n" ; } } while (false) | ||||||
3478 | << " non-trivial loop invariant conditions for unswitching.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Considering " << UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n" ; } } while (false); | ||||||
3479 | |||||||
3480 | NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate( | ||||||
3481 | UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo); | ||||||
3482 | |||||||
3483 | assert(Best.TI && "Failed to find loop unswitch candidate")(static_cast <bool> (Best.TI && "Failed to find loop unswitch candidate" ) ? void (0) : __assert_fail ("Best.TI && \"Failed to find loop unswitch candidate\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3483, __extension__ __PRETTY_FUNCTION__)); | ||||||
3484 | assert(Best.Cost && "Failed to compute cost")(static_cast <bool> (Best.Cost && "Failed to compute cost" ) ? void (0) : __assert_fail ("Best.Cost && \"Failed to compute cost\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3484, __extension__ __PRETTY_FUNCTION__)); | ||||||
3485 | |||||||
3486 | if (*Best.Cost >= UnswitchThreshold) { | ||||||
3487 | LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Costdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost << "\n"; } } while (false) | ||||||
3488 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost << "\n"; } } while (false); | ||||||
3489 | return false; | ||||||
3490 | } | ||||||
3491 | |||||||
3492 | if (Best.hasPendingInjection()) | ||||||
3493 | Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU); | ||||||
3494 | assert(!Best.hasPendingInjection() &&(static_cast <bool> (!Best.hasPendingInjection() && "All injections should have been done by now!") ? void (0) : __assert_fail ("!Best.hasPendingInjection() && \"All injections should have been done by now!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3495, __extension__ __PRETTY_FUNCTION__)) | ||||||
3495 | "All injections should have been done by now!")(static_cast <bool> (!Best.hasPendingInjection() && "All injections should have been done by now!") ? void (0) : __assert_fail ("!Best.hasPendingInjection() && \"All injections should have been done by now!\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3495, __extension__ __PRETTY_FUNCTION__)); | ||||||
3496 | |||||||
3497 | if (Best.TI != PartialIVCondBranch) | ||||||
3498 | PartialIVInfo.InstToDuplicate.clear(); | ||||||
3499 | |||||||
3500 | bool InsertFreeze; | ||||||
3501 | if (auto *SI = dyn_cast<SelectInst>(Best.TI)) { | ||||||
3502 | // If the best candidate is a select, turn it into a branch. Select | ||||||
3503 | // instructions with a poison conditional do not propagate poison, but | ||||||
3504 | // branching on poison causes UB. Insert a freeze on the select | ||||||
3505 | // conditional to prevent UB after turning the select into a branch. | ||||||
3506 | InsertFreeze = !isGuaranteedNotToBeUndefOrPoison( | ||||||
3507 | SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT); | ||||||
3508 | Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC); | ||||||
3509 | } else { | ||||||
3510 | // If the best candidate is a guard, turn it into a branch. | ||||||
3511 | if (isGuard(Best.TI)) | ||||||
3512 | Best.TI = | ||||||
3513 | turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU); | ||||||
3514 | InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC); | ||||||
3515 | } | ||||||
3516 | |||||||
3517 | LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Costdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = " << Best.Cost << ") terminator: " << *Best. TI << "\n"; } } while (false) | ||||||
3518 | << ") terminator: " << *Best.TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = " << Best.Cost << ") terminator: " << *Best. TI << "\n"; } } while (false); | ||||||
3519 | unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT, | ||||||
3520 | LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB, | ||||||
3521 | InsertFreeze); | ||||||
3522 | return true; | ||||||
3523 | } | ||||||
3524 | |||||||
3525 | /// Unswitch control flow predicated on loop invariant conditions. | ||||||
3526 | /// | ||||||
3527 | /// This first hoists all branches or switches which are trivial (IE, do not | ||||||
3528 | /// require duplicating any part of the loop) out of the loop body. It then | ||||||
3529 | /// looks at other loop invariant control flows and tries to unswitch those as | ||||||
3530 | /// well by cloning the loop if the result is small enough. | ||||||
3531 | /// | ||||||
3532 | /// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are | ||||||
3533 | /// also updated based on the unswitch. The `MSSA` analysis is also updated if | ||||||
3534 | /// valid (i.e. its use is enabled). | ||||||
3535 | /// | ||||||
3536 | /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is | ||||||
3537 | /// true, we will attempt to do non-trivial unswitching as well as trivial | ||||||
3538 | /// unswitching. | ||||||
3539 | /// | ||||||
3540 | /// The `UnswitchCB` callback provided will be run after unswitching is | ||||||
3541 | /// complete, with the first parameter set to `true` if the provided loop | ||||||
3542 | /// remains a loop, and a list of new sibling loops created. | ||||||
3543 | /// | ||||||
3544 | /// If `SE` is non-null, we will update that analysis based on the unswitching | ||||||
3545 | /// done. | ||||||
3546 | static bool | ||||||
3547 | unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, | ||||||
3548 | AAResults &AA, TargetTransformInfo &TTI, bool Trivial, | ||||||
3549 | bool NonTrivial, | ||||||
3550 | function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB, | ||||||
3551 | ScalarEvolution *SE, MemorySSAUpdater *MSSAU, | ||||||
3552 | ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, | ||||||
3553 | function_ref<void(Loop &, StringRef)> DestroyLoopCB) { | ||||||
3554 | assert(L.isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching.") ? void (0 ) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3555, __extension__ __PRETTY_FUNCTION__)) | ||||||
3555 | "Loops must be in LCSSA form before unswitching.")(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching.") ? void (0 ) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\"" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3555, __extension__ __PRETTY_FUNCTION__)); | ||||||
3556 | |||||||
3557 | // Must be in loop simplified form: we need a preheader and dedicated exits. | ||||||
3558 | if (!L.isLoopSimplifyForm()) | ||||||
3559 | return false; | ||||||
3560 | |||||||
3561 | // Try trivial unswitch first before loop over other basic blocks in the loop. | ||||||
3562 | if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) { | ||||||
3563 | // If we unswitched successfully we will want to clean up the loop before | ||||||
3564 | // processing it further so just mark it as unswitched and return. | ||||||
3565 | UnswitchCB(/*CurrentLoopValid*/ true, false, {}); | ||||||
3566 | return true; | ||||||
3567 | } | ||||||
3568 | |||||||
3569 | // Check whether we should continue with non-trivial conditions. | ||||||
3570 | // EnableNonTrivialUnswitch: Global variable that forces non-trivial | ||||||
3571 | // unswitching for testing and debugging. | ||||||
3572 | // NonTrivial: Parameter that enables non-trivial unswitching for this | ||||||
3573 | // invocation of the transform. But this should be allowed only | ||||||
3574 | // for targets without branch divergence. | ||||||
3575 | // | ||||||
3576 | // FIXME: If divergence analysis becomes available to a loop | ||||||
3577 | // transform, we should allow unswitching for non-trivial uniform | ||||||
3578 | // branches even on targets that have divergence. | ||||||
3579 | // https://bugs.llvm.org/show_bug.cgi?id=48819 | ||||||
3580 | bool ContinueWithNonTrivial = | ||||||
3581 | EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence()); | ||||||
3582 | if (!ContinueWithNonTrivial
| ||||||
3583 | return false; | ||||||
3584 | |||||||
3585 | // Skip non-trivial unswitching for optsize functions. | ||||||
3586 | if (L.getHeader()->getParent()->hasOptSize()) | ||||||
3587 | return false; | ||||||
3588 | |||||||
3589 | // Returns true if Loop L's loop nest is cold, i.e. if the headers of L, | ||||||
3590 | // of the loops L is nested in, and of the loops nested in L are all cold. | ||||||
3591 | auto IsLoopNestCold = [&](const Loop *L) { | ||||||
3592 | // Check L and all of its parent loops. | ||||||
3593 | auto *Parent = L; | ||||||
3594 | while (Parent) { | ||||||
3595 | if (!PSI->isColdBlock(Parent->getHeader(), BFI)) | ||||||
3596 | return false; | ||||||
3597 | Parent = Parent->getParentLoop(); | ||||||
3598 | } | ||||||
3599 | // Next check all loops nested within L. | ||||||
3600 | SmallVector<const Loop *, 4> Worklist; | ||||||
3601 | Worklist.insert(Worklist.end(), L->getSubLoops().begin(), | ||||||
3602 | L->getSubLoops().end()); | ||||||
3603 | while (!Worklist.empty()) { | ||||||
3604 | auto *CurLoop = Worklist.pop_back_val(); | ||||||
3605 | if (!PSI->isColdBlock(CurLoop->getHeader(), BFI)) | ||||||
3606 | return false; | ||||||
3607 | Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(), | ||||||
3608 | CurLoop->getSubLoops().end()); | ||||||
3609 | } | ||||||
3610 | return true; | ||||||
3611 | }; | ||||||
3612 | |||||||
3613 | // Skip cold loops in cold loop nests, as unswitching them brings little | ||||||
3614 | // benefit but increases the code size | ||||||
3615 | if (PSI
| ||||||
3616 | LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << " Skip cold loop: " << L << "\n"; } } while (false); | ||||||
3617 | return false; | ||||||
3618 | } | ||||||
3619 | |||||||
3620 | // Perform legality checks. | ||||||
3621 | if (!isSafeForNoNTrivialUnswitching(L, LI)) | ||||||
3622 | return false; | ||||||
3623 | |||||||
3624 | // For non-trivial unswitching, because it often creates new loops, we rely on | ||||||
3625 | // the pass manager to iterate on the loops rather than trying to immediately | ||||||
3626 | // reach a fixed point. There is no substantial advantage to iterating | ||||||
3627 | // internally, and if any of the new loops are simplified enough to contain | ||||||
3628 | // trivial unswitching we want to prefer those. | ||||||
3629 | |||||||
3630 | // Try to unswitch the best invariant condition. We prefer this full unswitch to | ||||||
3631 | // a partial unswitch when possible below the threshold. | ||||||
3632 | if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU, | ||||||
3633 | DestroyLoopCB)) | ||||||
3634 | return true; | ||||||
3635 | |||||||
3636 | // No other opportunities to unswitch. | ||||||
3637 | return false; | ||||||
3638 | } | ||||||
3639 | |||||||
3640 | PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, | ||||||
3641 | LoopStandardAnalysisResults &AR, | ||||||
3642 | LPMUpdater &U) { | ||||||
3643 | Function &F = *L.getHeader()->getParent(); | ||||||
3644 | (void)F; | ||||||
3645 | ProfileSummaryInfo *PSI = nullptr; | ||||||
3646 | if (auto OuterProxy
| ||||||
3647 | AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR) | ||||||
3648 | .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F)) | ||||||
3649 | PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); | ||||||
3650 | LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"; } } while (false) | ||||||
| |||||||
3651 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"; } } while (false); | ||||||
3652 | |||||||
3653 | // Save the current loop name in a variable so that we can report it even | ||||||
3654 | // after it has been deleted. | ||||||
3655 | std::string LoopName = std::string(L.getName()); | ||||||
3656 | |||||||
3657 | auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid, | ||||||
3658 | bool PartiallyInvariant, | ||||||
3659 | ArrayRef<Loop *> NewLoops) { | ||||||
3660 | // If we did a non-trivial unswitch, we have added new (cloned) loops. | ||||||
3661 | if (!NewLoops.empty()) | ||||||
3662 | U.addSiblingLoops(NewLoops); | ||||||
3663 | |||||||
3664 | // If the current loop remains valid, we should revisit it to catch any | ||||||
3665 | // other unswitch opportunities. Otherwise, we need to mark it as deleted. | ||||||
3666 | if (CurrentLoopValid) { | ||||||
3667 | if (PartiallyInvariant) { | ||||||
3668 | // Mark the new loop as partially unswitched, to avoid unswitching on | ||||||
3669 | // the same condition again. | ||||||
3670 | auto &Context = L.getHeader()->getContext(); | ||||||
3671 | MDNode *DisableUnswitchMD = MDNode::get( | ||||||
3672 | Context, | ||||||
3673 | MDString::get(Context, "llvm.loop.unswitch.partial.disable")); | ||||||
3674 | MDNode *NewLoopID = makePostTransformationMetadata( | ||||||
3675 | Context, L.getLoopID(), {"llvm.loop.unswitch.partial"}, | ||||||
3676 | {DisableUnswitchMD}); | ||||||
3677 | L.setLoopID(NewLoopID); | ||||||
3678 | } else | ||||||
3679 | U.revisitCurrentLoop(); | ||||||
3680 | } else | ||||||
3681 | U.markLoopAsDeleted(L, LoopName); | ||||||
3682 | }; | ||||||
3683 | |||||||
3684 | auto DestroyLoopCB = [&U](Loop &L, StringRef Name) { | ||||||
3685 | U.markLoopAsDeleted(L, Name); | ||||||
3686 | }; | ||||||
3687 | |||||||
3688 | std::optional<MemorySSAUpdater> MSSAU; | ||||||
3689 | if (AR.MSSA) { | ||||||
3690 | MSSAU = MemorySSAUpdater(AR.MSSA); | ||||||
3691 | if (VerifyMemorySSA) | ||||||
3692 | AR.MSSA->verifyMemorySSA(); | ||||||
3693 | } | ||||||
3694 | if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial, | ||||||
3695 | UnswitchCB, &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, | ||||||
3696 | DestroyLoopCB)) | ||||||
3697 | return PreservedAnalyses::all(); | ||||||
3698 | |||||||
3699 | if (AR.MSSA && VerifyMemorySSA) | ||||||
3700 | AR.MSSA->verifyMemorySSA(); | ||||||
3701 | |||||||
3702 | // Historically this pass has had issues with the dominator tree so verify it | ||||||
3703 | // in asserts builds. | ||||||
3704 | assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (AR.DT.verify(DominatorTree::VerificationLevel ::Fast)) ? void (0) : __assert_fail ("AR.DT.verify(DominatorTree::VerificationLevel::Fast)" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3704, __extension__ __PRETTY_FUNCTION__)); | ||||||
3705 | |||||||
3706 | auto PA = getLoopPassPreservedAnalyses(); | ||||||
3707 | if (AR.MSSA) | ||||||
3708 | PA.preserve<MemorySSAAnalysis>(); | ||||||
3709 | return PA; | ||||||
3710 | } | ||||||
3711 | |||||||
3712 | void SimpleLoopUnswitchPass::printPipeline( | ||||||
3713 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { | ||||||
3714 | static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline( | ||||||
3715 | OS, MapClassName2PassName); | ||||||
3716 | |||||||
3717 | OS << '<'; | ||||||
3718 | OS << (NonTrivial ? "" : "no-") << "nontrivial;"; | ||||||
3719 | OS << (Trivial ? "" : "no-") << "trivial"; | ||||||
3720 | OS << '>'; | ||||||
3721 | } | ||||||
3722 | |||||||
3723 | namespace { | ||||||
3724 | |||||||
3725 | class SimpleLoopUnswitchLegacyPass : public LoopPass { | ||||||
3726 | bool NonTrivial; | ||||||
3727 | |||||||
3728 | public: | ||||||
3729 | static char ID; // Pass ID, replacement for typeid | ||||||
3730 | |||||||
3731 | explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false) | ||||||
3732 | : LoopPass(ID), NonTrivial(NonTrivial) { | ||||||
3733 | initializeSimpleLoopUnswitchLegacyPassPass( | ||||||
3734 | *PassRegistry::getPassRegistry()); | ||||||
3735 | } | ||||||
3736 | |||||||
3737 | bool runOnLoop(Loop *L, LPPassManager &LPM) override; | ||||||
3738 | |||||||
3739 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||||
3740 | AU.addRequired<AssumptionCacheTracker>(); | ||||||
3741 | AU.addRequired<TargetTransformInfoWrapperPass>(); | ||||||
3742 | AU.addRequired<MemorySSAWrapperPass>(); | ||||||
3743 | AU.addPreserved<MemorySSAWrapperPass>(); | ||||||
3744 | getLoopAnalysisUsage(AU); | ||||||
3745 | } | ||||||
3746 | }; | ||||||
3747 | |||||||
3748 | } // end anonymous namespace | ||||||
3749 | |||||||
3750 | bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { | ||||||
3751 | if (skipLoop(L)) | ||||||
3752 | return false; | ||||||
3753 | |||||||
3754 | Function &F = *L->getHeader()->getParent(); | ||||||
3755 | |||||||
3756 | LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n" ; } } while (false) | ||||||
3757 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("simple-loop-unswitch")) { dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n" ; } } while (false); | ||||||
3758 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | ||||||
3759 | auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | ||||||
3760 | auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); | ||||||
3761 | auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); | ||||||
3762 | auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); | ||||||
3763 | MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); | ||||||
3764 | MemorySSAUpdater MSSAU(MSSA); | ||||||
3765 | |||||||
3766 | auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); | ||||||
3767 | auto *SE = SEWP ? &SEWP->getSE() : nullptr; | ||||||
3768 | |||||||
3769 | auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant, | ||||||
3770 | ArrayRef<Loop *> NewLoops) { | ||||||
3771 | // If we did a non-trivial unswitch, we have added new (cloned) loops. | ||||||
3772 | for (auto *NewL : NewLoops) | ||||||
3773 | LPM.addLoop(*NewL); | ||||||
3774 | |||||||
3775 | // If the current loop remains valid, re-add it to the queue. This is | ||||||
3776 | // a little wasteful as we'll finish processing the current loop as well, | ||||||
3777 | // but it is the best we can do in the old PM. | ||||||
3778 | if (CurrentLoopValid) { | ||||||
3779 | // If the current loop has been unswitched using a partially invariant | ||||||
3780 | // condition, we should not re-add the current loop to avoid unswitching | ||||||
3781 | // on the same condition again. | ||||||
3782 | if (!PartiallyInvariant) | ||||||
3783 | LPM.addLoop(*L); | ||||||
3784 | } else | ||||||
3785 | LPM.markLoopAsDeleted(*L); | ||||||
3786 | }; | ||||||
3787 | |||||||
3788 | auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) { | ||||||
3789 | LPM.markLoopAsDeleted(L); | ||||||
3790 | }; | ||||||
3791 | |||||||
3792 | if (VerifyMemorySSA) | ||||||
3793 | MSSA->verifyMemorySSA(); | ||||||
3794 | bool Changed = | ||||||
3795 | unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE, | ||||||
3796 | &MSSAU, nullptr, nullptr, DestroyLoopCB); | ||||||
3797 | |||||||
3798 | if (VerifyMemorySSA) | ||||||
3799 | MSSA->verifyMemorySSA(); | ||||||
3800 | |||||||
3801 | // Historically this pass has had issues with the dominator tree so verify it | ||||||
3802 | // in asserts builds. | ||||||
3803 | assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel ::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)" , "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3803, __extension__ __PRETTY_FUNCTION__)); | ||||||
3804 | |||||||
3805 | return Changed; | ||||||
3806 | } | ||||||
3807 | |||||||
3808 | char SimpleLoopUnswitchLegacyPass::ID = 0; | ||||||
3809 | INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry &Registry) { | ||||||
3810 | "Simple unswitch loops", false, false)static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry &Registry) { | ||||||
3811 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); | ||||||
3812 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | ||||||
3813 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); | ||||||
3814 | INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry); | ||||||
3815 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry); | ||||||
3816 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | ||||||
3817 | INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch" , &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag ; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag , initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry )); } | ||||||
3818 | "Simple unswitch loops", false, false)PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch" , &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag ; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag , initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry )); } | ||||||
3819 | |||||||
3820 | Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) { | ||||||
3821 | return new SimpleLoopUnswitchLegacyPass(NonTrivial); | ||||||
3822 | } |
1 | // Components for manipulating sequences of characters -*- C++ -*- |
2 | |
3 | // Copyright (C) 1997-2020 Free Software Foundation, Inc. |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free |
6 | // software; you can redistribute it and/or modify it under the |
7 | // terms of the GNU General Public License as published by the |
8 | // Free Software Foundation; either version 3, or (at your option) |
9 | // any later version. |
10 | |
11 | // This library is distributed in the hope that it will be useful, |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | // GNU General Public License for more details. |
15 | |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version |
18 | // 3.1, as published by the Free Software Foundation. |
19 | |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
23 | // <http://www.gnu.org/licenses/>. |
24 | |
25 | /** @file bits/basic_string.h |
26 | * This is an internal header file, included by other library headers. |
27 | * Do not attempt to use it directly. @headername{string} |
28 | */ |
29 | |
30 | // |
31 | // ISO C++ 14882: 21 Strings library |
32 | // |
33 | |
34 | #ifndef _BASIC_STRING_H1 |
35 | #define _BASIC_STRING_H1 1 |
36 | |
37 | #pragma GCC system_header |
38 | |
39 | #include <ext/atomicity.h> |
40 | #include <ext/alloc_traits.h> |
41 | #include <debug/debug.h> |
42 | |
43 | #if __cplusplus201703L >= 201103L |
44 | #include <initializer_list> |
45 | #endif |
46 | |
47 | #if __cplusplus201703L >= 201703L |
48 | # include <string_view> |
49 | #endif |
50 | |
51 | |
52 | namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default"))) |
53 | { |
54 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
55 | |
56 | #if _GLIBCXX_USE_CXX11_ABI1 |
57 | _GLIBCXX_BEGIN_NAMESPACE_CXX11namespace __cxx11 { |
58 | /** |
59 | * @class basic_string basic_string.h <string> |
60 | * @brief Managing sequences of characters and character-like objects. |
61 | * |
62 | * @ingroup strings |
63 | * @ingroup sequences |
64 | * |
65 | * @tparam _CharT Type of character |
66 | * @tparam _Traits Traits for character type, defaults to |
67 | * char_traits<_CharT>. |
68 | * @tparam _Alloc Allocator type, defaults to allocator<_CharT>. |
69 | * |
70 | * Meets the requirements of a <a href="tables.html#65">container</a>, a |
71 | * <a href="tables.html#66">reversible container</a>, and a |
72 | * <a href="tables.html#67">sequence</a>. Of the |
73 | * <a href="tables.html#68">optional sequence requirements</a>, only |
74 | * @c push_back, @c at, and @c %array access are supported. |
75 | */ |
76 | template<typename _CharT, typename _Traits, typename _Alloc> |
77 | class basic_string |
78 | { |
79 | typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template |
80 | rebind<_CharT>::other _Char_alloc_type; |
81 | typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits; |
82 | |
83 | // Types: |
84 | public: |
85 | typedef _Traits traits_type; |
86 | typedef typename _Traits::char_type value_type; |
87 | typedef _Char_alloc_type allocator_type; |
88 | typedef typename _Alloc_traits::size_type size_type; |
89 | typedef typename _Alloc_traits::difference_type difference_type; |
90 | typedef typename _Alloc_traits::reference reference; |
91 | typedef typename _Alloc_traits::const_reference const_reference; |
92 | typedef typename _Alloc_traits::pointer pointer; |
93 | typedef typename _Alloc_traits::const_pointer const_pointer; |
94 | typedef __gnu_cxx::__normal_iterator<pointer, basic_string> iterator; |
95 | typedef __gnu_cxx::__normal_iterator<const_pointer, basic_string> |
96 | const_iterator; |
97 | typedef std::reverse_iterator<const_iterator> const_reverse_iterator; |
98 | typedef std::reverse_iterator<iterator> reverse_iterator; |
99 | |
100 | /// Value returned by various member functions when they fail. |
101 | static const size_type npos = static_cast<size_type>(-1); |
102 | |
103 | protected: |
104 | // type used for positions in insert, erase etc. |
105 | #if __cplusplus201703L < 201103L |
106 | typedef iterator __const_iterator; |
107 | #else |
108 | typedef const_iterator __const_iterator; |
109 | #endif |
110 | |
111 | private: |
112 | #if __cplusplus201703L >= 201703L |
113 | // A helper type for avoiding boiler-plate. |
114 | typedef basic_string_view<_CharT, _Traits> __sv_type; |
115 | |
116 | template<typename _Tp, typename _Res> |
117 | using _If_sv = enable_if_t< |
118 | __and_<is_convertible<const _Tp&, __sv_type>, |
119 | __not_<is_convertible<const _Tp*, const basic_string*>>, |
120 | __not_<is_convertible<const _Tp&, const _CharT*>>>::value, |
121 | _Res>; |
122 | |
123 | // Allows an implicit conversion to __sv_type. |
124 | static __sv_type |
125 | _S_to_string_view(__sv_type __svt) noexcept |
126 | { return __svt; } |
127 | |
128 | // Wraps a string_view by explicit conversion and thus |
129 | // allows to add an internal constructor that does not |
130 | // participate in overload resolution when a string_view |
131 | // is provided. |
132 | struct __sv_wrapper |
133 | { |
134 | explicit __sv_wrapper(__sv_type __sv) noexcept : _M_sv(__sv) { } |
135 | __sv_type _M_sv; |
136 | }; |
137 | |
138 | /** |
139 | * @brief Only internally used: Construct string from a string view |
140 | * wrapper. |
141 | * @param __svw string view wrapper. |
142 | * @param __a Allocator to use. |
143 | */ |
144 | explicit |
145 | basic_string(__sv_wrapper __svw, const _Alloc& __a) |
146 | : basic_string(__svw._M_sv.data(), __svw._M_sv.size(), __a) { } |
147 | #endif |
148 | |
149 | // Use empty-base optimization: http://www.cantrip.org/emptyopt.html |
150 | struct _Alloc_hider : allocator_type // TODO check __is_final |
151 | { |
152 | #if __cplusplus201703L < 201103L |
153 | _Alloc_hider(pointer __dat, const _Alloc& __a = _Alloc()) |
154 | : allocator_type(__a), _M_p(__dat) { } |
155 | #else |
156 | _Alloc_hider(pointer __dat, const _Alloc& __a) |
157 | : allocator_type(__a), _M_p(__dat) { } |
158 | |
159 | _Alloc_hider(pointer __dat, _Alloc&& __a = _Alloc()) |
160 | : allocator_type(std::move(__a)), _M_p(__dat) { } |
161 | #endif |
162 | |
163 | pointer _M_p; // The actual data. |
164 | }; |
165 | |
166 | _Alloc_hider _M_dataplus; |
167 | size_type _M_string_length; |
168 | |
169 | enum { _S_local_capacity = 15 / sizeof(_CharT) }; |
170 | |
171 | union |
172 | { |
173 | _CharT _M_local_buf[_S_local_capacity + 1]; |
174 | size_type _M_allocated_capacity; |
175 | }; |
176 | |
177 | void |
178 | _M_data(pointer __p) |
179 | { _M_dataplus._M_p = __p; } |
180 | |
181 | void |
182 | _M_length(size_type __length) |
183 | { _M_string_length = __length; } |
184 | |
185 | pointer |
186 | _M_data() const |
187 | { return _M_dataplus._M_p; } |
188 | |
189 | pointer |
190 | _M_local_data() |
191 | { |
192 | #if __cplusplus201703L >= 201103L |
193 | return std::pointer_traits<pointer>::pointer_to(*_M_local_buf); |
194 | #else |
195 | return pointer(_M_local_buf); |
196 | #endif |
197 | } |
198 | |
199 | const_pointer |
200 | _M_local_data() const |
201 | { |
202 | #if __cplusplus201703L >= 201103L |
203 | return std::pointer_traits<const_pointer>::pointer_to(*_M_local_buf); |
204 | #else |
205 | return const_pointer(_M_local_buf); |
206 | #endif |
207 | } |
208 | |
209 | void |
210 | _M_capacity(size_type __capacity) |
211 | { _M_allocated_capacity = __capacity; } |
212 | |
213 | void |
214 | _M_set_length(size_type __n) |
215 | { |
216 | _M_length(__n); |
217 | traits_type::assign(_M_data()[__n], _CharT()); |
218 | } |
219 | |
220 | bool |
221 | _M_is_local() const |
222 | { return _M_data() == _M_local_data(); } |
223 | |
224 | // Create & Destroy |
225 | pointer |
226 | _M_create(size_type&, size_type); |
227 | |
228 | void |
229 | _M_dispose() |
230 | { |
231 | if (!_M_is_local()) |
232 | _M_destroy(_M_allocated_capacity); |
233 | } |
234 | |
235 | void |
236 | _M_destroy(size_type __size) throw() |
237 | { _Alloc_traits::deallocate(_M_get_allocator(), _M_data(), __size + 1); } |
238 | |
239 | // _M_construct_aux is used to implement the 21.3.1 para 15 which |
240 | // requires special behaviour if _InIterator is an integral type |
241 | template<typename _InIterator> |
242 | void |
243 | _M_construct_aux(_InIterator __beg, _InIterator __end, |
244 | std::__false_type) |
245 | { |
246 | typedef typename iterator_traits<_InIterator>::iterator_category _Tag; |
247 | _M_construct(__beg, __end, _Tag()); |
248 | } |
249 | |
250 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
251 | // 438. Ambiguity in the "do the right thing" clause |
252 | template<typename _Integer> |
253 | void |
254 | _M_construct_aux(_Integer __beg, _Integer __end, std::__true_type) |
255 | { _M_construct_aux_2(static_cast<size_type>(__beg), __end); } |
256 | |
257 | void |
258 | _M_construct_aux_2(size_type __req, _CharT __c) |
259 | { _M_construct(__req, __c); } |
260 | |
261 | template<typename _InIterator> |
262 | void |
263 | _M_construct(_InIterator __beg, _InIterator __end) |
264 | { |
265 | typedef typename std::__is_integer<_InIterator>::__type _Integral; |
266 | _M_construct_aux(__beg, __end, _Integral()); |
267 | } |
268 | |
269 | // For Input Iterators, used in istreambuf_iterators, etc. |
270 | template<typename _InIterator> |
271 | void |
272 | _M_construct(_InIterator __beg, _InIterator __end, |
273 | std::input_iterator_tag); |
274 | |
275 | // For forward_iterators up to random_access_iterators, used for |
276 | // string::iterator, _CharT*, etc. |
277 | template<typename _FwdIterator> |
278 | void |
279 | _M_construct(_FwdIterator __beg, _FwdIterator __end, |
280 | std::forward_iterator_tag); |
281 | |
282 | void |
283 | _M_construct(size_type __req, _CharT __c); |
284 | |
285 | allocator_type& |
286 | _M_get_allocator() |
287 | { return _M_dataplus; } |
288 | |
289 | const allocator_type& |
290 | _M_get_allocator() const |
291 | { return _M_dataplus; } |
292 | |
293 | private: |
294 | |
295 | #ifdef _GLIBCXX_DISAMBIGUATE_REPLACE_INST |
296 | // The explicit instantiations in misc-inst.cc require this due to |
297 | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64063 |
298 | template<typename _Tp, bool _Requires = |
299 | !__are_same<_Tp, _CharT*>::__value |
300 | && !__are_same<_Tp, const _CharT*>::__value |
301 | && !__are_same<_Tp, iterator>::__value |
302 | && !__are_same<_Tp, const_iterator>::__value> |
303 | struct __enable_if_not_native_iterator |
304 | { typedef basic_string& __type; }; |
305 | template<typename _Tp> |
306 | struct __enable_if_not_native_iterator<_Tp, false> { }; |
307 | #endif |
308 | |
309 | size_type |
310 | _M_check(size_type __pos, const char* __s) const |
311 | { |
312 | if (__pos > this->size()) |
313 | __throw_out_of_range_fmt(__N("%s: __pos (which is %zu) > "("%s: __pos (which is %zu) > " "this->size() (which is %zu)" ) |
314 | "this->size() (which is %zu)")("%s: __pos (which is %zu) > " "this->size() (which is %zu)" ), |
315 | __s, __pos, this->size()); |
316 | return __pos; |
317 | } |
318 | |
319 | void |
320 | _M_check_length(size_type __n1, size_type __n2, const char* __s) const |
321 | { |
322 | if (this->max_size() - (this->size() - __n1) < __n2) |
323 | __throw_length_error(__N(__s)(__s)); |
324 | } |
325 | |
326 | |
327 | // NB: _M_limit doesn't check for a bad __pos value. |
328 | size_type |
329 | _M_limit(size_type __pos, size_type __off) const _GLIBCXX_NOEXCEPTnoexcept |
330 | { |
331 | const bool __testoff = __off < this->size() - __pos; |
332 | return __testoff ? __off : this->size() - __pos; |
333 | } |
334 | |
335 | // True if _Rep and source do not overlap. |
336 | bool |
337 | _M_disjunct(const _CharT* __s) const _GLIBCXX_NOEXCEPTnoexcept |
338 | { |
339 | return (less<const _CharT*>()(__s, _M_data()) |
340 | || less<const _CharT*>()(_M_data() + this->size(), __s)); |
341 | } |
342 | |
343 | // When __n = 1 way faster than the general multichar |
344 | // traits_type::copy/move/assign. |
345 | static void |
346 | _S_copy(_CharT* __d, const _CharT* __s, size_type __n) |
347 | { |
348 | if (__n == 1) |
349 | traits_type::assign(*__d, *__s); |
350 | else |
351 | traits_type::copy(__d, __s, __n); |
352 | } |
353 | |
354 | static void |
355 | _S_move(_CharT* __d, const _CharT* __s, size_type __n) |
356 | { |
357 | if (__n == 1) |
358 | traits_type::assign(*__d, *__s); |
359 | else |
360 | traits_type::move(__d, __s, __n); |
361 | } |
362 | |
363 | static void |
364 | _S_assign(_CharT* __d, size_type __n, _CharT __c) |
365 | { |
366 | if (__n == 1) |
367 | traits_type::assign(*__d, __c); |
368 | else |
369 | traits_type::assign(__d, __n, __c); |
370 | } |
371 | |
372 | // _S_copy_chars is a separate template to permit specialization |
373 | // to optimize for the common case of pointers as iterators. |
374 | template<class _Iterator> |
375 | static void |
376 | _S_copy_chars(_CharT* __p, _Iterator __k1, _Iterator __k2) |
377 | { |
378 | for (; __k1 != __k2; ++__k1, (void)++__p) |
379 | traits_type::assign(*__p, *__k1); // These types are off. |
380 | } |
381 | |
382 | static void |
383 | _S_copy_chars(_CharT* __p, iterator __k1, iterator __k2) _GLIBCXX_NOEXCEPTnoexcept |
384 | { _S_copy_chars(__p, __k1.base(), __k2.base()); } |
385 | |
386 | static void |
387 | _S_copy_chars(_CharT* __p, const_iterator __k1, const_iterator __k2) |
388 | _GLIBCXX_NOEXCEPTnoexcept |
389 | { _S_copy_chars(__p, __k1.base(), __k2.base()); } |
390 | |
391 | static void |
392 | _S_copy_chars(_CharT* __p, _CharT* __k1, _CharT* __k2) _GLIBCXX_NOEXCEPTnoexcept |
393 | { _S_copy(__p, __k1, __k2 - __k1); } |
394 | |
395 | static void |
396 | _S_copy_chars(_CharT* __p, const _CharT* __k1, const _CharT* __k2) |
397 | _GLIBCXX_NOEXCEPTnoexcept |
398 | { _S_copy(__p, __k1, __k2 - __k1); } |
399 | |
400 | static int |
401 | _S_compare(size_type __n1, size_type __n2) _GLIBCXX_NOEXCEPTnoexcept |
402 | { |
403 | const difference_type __d = difference_type(__n1 - __n2); |
404 | |
405 | if (__d > __gnu_cxx::__numeric_traits<int>::__max) |
406 | return __gnu_cxx::__numeric_traits<int>::__max; |
407 | else if (__d < __gnu_cxx::__numeric_traits<int>::__min) |
408 | return __gnu_cxx::__numeric_traits<int>::__min; |
409 | else |
410 | return int(__d); |
411 | } |
412 | |
413 | void |
414 | _M_assign(const basic_string&); |
415 | |
416 | void |
417 | _M_mutate(size_type __pos, size_type __len1, const _CharT* __s, |
418 | size_type __len2); |
419 | |
420 | void |
421 | _M_erase(size_type __pos, size_type __n); |
422 | |
423 | public: |
424 | // Construct/copy/destroy: |
425 | // NB: We overload ctors in some cases instead of using default |
426 | // arguments, per 17.4.4.4 para. 2 item 2. |
427 | |
428 | /** |
429 | * @brief Default constructor creates an empty string. |
430 | */ |
431 | basic_string() |
432 | _GLIBCXX_NOEXCEPT_IF(is_nothrow_default_constructible<_Alloc>::value)noexcept(is_nothrow_default_constructible<_Alloc>::value ) |
433 | : _M_dataplus(_M_local_data()) |
434 | { _M_set_length(0); } |
435 | |
436 | /** |
437 | * @brief Construct an empty string using allocator @a a. |
438 | */ |
439 | explicit |
440 | basic_string(const _Alloc& __a) _GLIBCXX_NOEXCEPTnoexcept |
441 | : _M_dataplus(_M_local_data(), __a) |
442 | { _M_set_length(0); } |
443 | |
444 | /** |
445 | * @brief Construct string with copy of value of @a __str. |
446 | * @param __str Source string. |
447 | */ |
448 | basic_string(const basic_string& __str) |
449 | : _M_dataplus(_M_local_data(), |
450 | _Alloc_traits::_S_select_on_copy(__str._M_get_allocator())) |
451 | { _M_construct(__str._M_data(), __str._M_data() + __str.length()); } |
452 | |
453 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
454 | // 2583. no way to supply an allocator for basic_string(str, pos) |
455 | /** |
456 | * @brief Construct string as copy of a substring. |
457 | * @param __str Source string. |
458 | * @param __pos Index of first character to copy from. |
459 | * @param __a Allocator to use. |
460 | */ |
461 | basic_string(const basic_string& __str, size_type __pos, |
462 | const _Alloc& __a = _Alloc()) |
463 | : _M_dataplus(_M_local_data(), __a) |
464 | { |
465 | const _CharT* __start = __str._M_data() |
466 | + __str._M_check(__pos, "basic_string::basic_string"); |
467 | _M_construct(__start, __start + __str._M_limit(__pos, npos)); |
468 | } |
469 | |
470 | /** |
471 | * @brief Construct string as copy of a substring. |
472 | * @param __str Source string. |
473 | * @param __pos Index of first character to copy from. |
474 | * @param __n Number of characters to copy. |
475 | */ |
476 | basic_string(const basic_string& __str, size_type __pos, |
477 | size_type __n) |
478 | : _M_dataplus(_M_local_data()) |
479 | { |
480 | const _CharT* __start = __str._M_data() |
481 | + __str._M_check(__pos, "basic_string::basic_string"); |
482 | _M_construct(__start, __start + __str._M_limit(__pos, __n)); |
483 | } |
484 | |
485 | /** |
486 | * @brief Construct string as copy of a substring. |
487 | * @param __str Source string. |
488 | * @param __pos Index of first character to copy from. |
489 | * @param __n Number of characters to copy. |
490 | * @param __a Allocator to use. |
491 | */ |
492 | basic_string(const basic_string& __str, size_type __pos, |
493 | size_type __n, const _Alloc& __a) |
494 | : _M_dataplus(_M_local_data(), __a) |
495 | { |
496 | const _CharT* __start |
497 | = __str._M_data() + __str._M_check(__pos, "string::string"); |
498 | _M_construct(__start, __start + __str._M_limit(__pos, __n)); |
499 | } |
500 | |
501 | /** |
502 | * @brief Construct string initialized by a character %array. |
503 | * @param __s Source character %array. |
504 | * @param __n Number of characters to copy. |
505 | * @param __a Allocator to use (default is default allocator). |
506 | * |
507 | * NB: @a __s must have at least @a __n characters, '\\0' |
508 | * has no special meaning. |
509 | */ |
510 | basic_string(const _CharT* __s, size_type __n, |
511 | const _Alloc& __a = _Alloc()) |
512 | : _M_dataplus(_M_local_data(), __a) |
513 | { _M_construct(__s, __s + __n); } |
514 | |
515 | /** |
516 | * @brief Construct string as copy of a C string. |
517 | * @param __s Source C string. |
518 | * @param __a Allocator to use (default is default allocator). |
519 | */ |
520 | #if __cpp_deduction_guides201703L && ! defined _GLIBCXX_DEFINING_STRING_INSTANTIATIONS |
521 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
522 | // 3076. basic_string CTAD ambiguity |
523 | template<typename = _RequireAllocator<_Alloc>> |
524 | #endif |
525 | basic_string(const _CharT* __s, const _Alloc& __a = _Alloc()) |
526 | : _M_dataplus(_M_local_data(), __a) |
527 | { _M_construct(__s, __s ? __s + traits_type::length(__s) : __s+npos); } |
528 | |
529 | /** |
530 | * @brief Construct string as multiple characters. |
531 | * @param __n Number of characters. |
532 | * @param __c Character to use. |
533 | * @param __a Allocator to use (default is default allocator). |
534 | */ |
535 | #if __cpp_deduction_guides201703L && ! defined _GLIBCXX_DEFINING_STRING_INSTANTIATIONS |
536 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
537 | // 3076. basic_string CTAD ambiguity |
538 | template<typename = _RequireAllocator<_Alloc>> |
539 | #endif |
540 | basic_string(size_type __n, _CharT __c, const _Alloc& __a = _Alloc()) |
541 | : _M_dataplus(_M_local_data(), __a) |
542 | { _M_construct(__n, __c); } |
543 | |
544 | #if __cplusplus201703L >= 201103L |
545 | /** |
546 | * @brief Move construct string. |
547 | * @param __str Source string. |
548 | * |
549 | * The newly-created string contains the exact contents of @a __str. |
550 | * @a __str is a valid, but unspecified string. |
551 | **/ |
552 | basic_string(basic_string&& __str) noexcept |
553 | : _M_dataplus(_M_local_data(), std::move(__str._M_get_allocator())) |
554 | { |
555 | if (__str._M_is_local()) |
556 | { |
557 | traits_type::copy(_M_local_buf, __str._M_local_buf, |
558 | _S_local_capacity + 1); |
559 | } |
560 | else |
561 | { |
562 | _M_data(__str._M_data()); |
563 | _M_capacity(__str._M_allocated_capacity); |
564 | } |
565 | |
566 | // Must use _M_length() here not _M_set_length() because |
567 | // basic_stringbuf relies on writing into unallocated capacity so |
568 | // we mess up the contents if we put a '\0' in the string. |
569 | _M_length(__str.length()); |
570 | __str._M_data(__str._M_local_data()); |
571 | __str._M_set_length(0); |
572 | } |
573 | |
574 | /** |
575 | * @brief Construct string from an initializer %list. |
576 | * @param __l std::initializer_list of characters. |
577 | * @param __a Allocator to use (default is default allocator). |
578 | */ |
579 | basic_string(initializer_list<_CharT> __l, const _Alloc& __a = _Alloc()) |
580 | : _M_dataplus(_M_local_data(), __a) |
581 | { _M_construct(__l.begin(), __l.end()); } |
582 | |
583 | basic_string(const basic_string& __str, const _Alloc& __a) |
584 | : _M_dataplus(_M_local_data(), __a) |
585 | { _M_construct(__str.begin(), __str.end()); } |
586 | |
587 | basic_string(basic_string&& __str, const _Alloc& __a) |
588 | noexcept(_Alloc_traits::_S_always_equal()) |
589 | : _M_dataplus(_M_local_data(), __a) |
590 | { |
591 | if (__str._M_is_local()) |
592 | { |
593 | traits_type::copy(_M_local_buf, __str._M_local_buf, |
594 | _S_local_capacity + 1); |
595 | _M_length(__str.length()); |
596 | __str._M_set_length(0); |
597 | } |
598 | else if (_Alloc_traits::_S_always_equal() |
599 | || __str.get_allocator() == __a) |
600 | { |
601 | _M_data(__str._M_data()); |
602 | _M_length(__str.length()); |
603 | _M_capacity(__str._M_allocated_capacity); |
604 | __str._M_data(__str._M_local_buf); |
605 | __str._M_set_length(0); |
606 | } |
607 | else |
608 | _M_construct(__str.begin(), __str.end()); |
609 | } |
610 | |
611 | #endif // C++11 |
612 | |
613 | /** |
614 | * @brief Construct string as copy of a range. |
615 | * @param __beg Start of range. |
616 | * @param __end End of range. |
617 | * @param __a Allocator to use (default is default allocator). |
618 | */ |
619 | #if __cplusplus201703L >= 201103L |
620 | template<typename _InputIterator, |
621 | typename = std::_RequireInputIter<_InputIterator>> |
622 | #else |
623 | template<typename _InputIterator> |
624 | #endif |
625 | basic_string(_InputIterator __beg, _InputIterator __end, |
626 | const _Alloc& __a = _Alloc()) |
627 | : _M_dataplus(_M_local_data(), __a) |
628 | { _M_construct(__beg, __end); } |
629 | |
630 | #if __cplusplus201703L >= 201703L |
631 | /** |
632 | * @brief Construct string from a substring of a string_view. |
633 | * @param __t Source object convertible to string view. |
634 | * @param __pos The index of the first character to copy from __t. |
635 | * @param __n The number of characters to copy from __t. |
636 | * @param __a Allocator to use. |
637 | */ |
638 | template<typename _Tp, typename = _If_sv<_Tp, void>> |
639 | basic_string(const _Tp& __t, size_type __pos, size_type __n, |
640 | const _Alloc& __a = _Alloc()) |
641 | : basic_string(_S_to_string_view(__t).substr(__pos, __n), __a) { } |
642 | |
643 | /** |
644 | * @brief Construct string from a string_view. |
645 | * @param __t Source object convertible to string view. |
646 | * @param __a Allocator to use (default is default allocator). |
647 | */ |
648 | template<typename _Tp, typename = _If_sv<_Tp, void>> |
649 | explicit |
650 | basic_string(const _Tp& __t, const _Alloc& __a = _Alloc()) |
651 | : basic_string(__sv_wrapper(_S_to_string_view(__t)), __a) { } |
652 | #endif // C++17 |
653 | |
654 | /** |
655 | * @brief Destroy the string instance. |
656 | */ |
657 | ~basic_string() |
658 | { _M_dispose(); } |
659 | |
660 | /** |
661 | * @brief Assign the value of @a str to this string. |
662 | * @param __str Source string. |
663 | */ |
664 | basic_string& |
665 | operator=(const basic_string& __str) |
666 | { |
667 | return this->assign(__str); |
668 | } |
669 | |
670 | /** |
671 | * @brief Copy contents of @a s into this string. |
672 | * @param __s Source null-terminated string. |
673 | */ |
674 | basic_string& |
675 | operator=(const _CharT* __s) |
676 | { return this->assign(__s); } |
677 | |
678 | /** |
679 | * @brief Set value to string of length 1. |
680 | * @param __c Source character. |
681 | * |
682 | * Assigning to a character makes this string length 1 and |
683 | * (*this)[0] == @a c. |
684 | */ |
685 | basic_string& |
686 | operator=(_CharT __c) |
687 | { |
688 | this->assign(1, __c); |
689 | return *this; |
690 | } |
691 | |
692 | #if __cplusplus201703L >= 201103L |
693 | /** |
694 | * @brief Move assign the value of @a str to this string. |
695 | * @param __str Source string. |
696 | * |
697 | * The contents of @a str are moved into this string (without copying). |
698 | * @a str is a valid, but unspecified string. |
699 | **/ |
700 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
701 | // 2063. Contradictory requirements for string move assignment |
702 | basic_string& |
703 | operator=(basic_string&& __str) |
704 | noexcept(_Alloc_traits::_S_nothrow_move()) |
705 | { |
706 | if (!_M_is_local() && _Alloc_traits::_S_propagate_on_move_assign() |
707 | && !_Alloc_traits::_S_always_equal() |
708 | && _M_get_allocator() != __str._M_get_allocator()) |
709 | { |
710 | // Destroy existing storage before replacing allocator. |
711 | _M_destroy(_M_allocated_capacity); |
712 | _M_data(_M_local_data()); |
713 | _M_set_length(0); |
714 | } |
715 | // Replace allocator if POCMA is true. |
716 | std::__alloc_on_move(_M_get_allocator(), __str._M_get_allocator()); |
717 | |
718 | if (__str._M_is_local()) |
719 | { |
720 | // We've always got room for a short string, just copy it. |
721 | if (__str.size()) |
722 | this->_S_copy(_M_data(), __str._M_data(), __str.size()); |
723 | _M_set_length(__str.size()); |
724 | } |
725 | else if (_Alloc_traits::_S_propagate_on_move_assign() |
726 | || _Alloc_traits::_S_always_equal() |
727 | || _M_get_allocator() == __str._M_get_allocator()) |
728 | { |
729 | // Just move the allocated pointer, our allocator can free it. |
730 | pointer __data = nullptr; |
731 | size_type __capacity; |
732 | if (!_M_is_local()) |
733 | { |
734 | if (_Alloc_traits::_S_always_equal()) |
735 | { |
736 | // __str can reuse our existing storage. |
737 | __data = _M_data(); |
738 | __capacity = _M_allocated_capacity; |
739 | } |
740 | else // __str can't use it, so free it. |
741 | _M_destroy(_M_allocated_capacity); |
742 | } |
743 | |
744 | _M_data(__str._M_data()); |
745 | _M_length(__str.length()); |
746 | _M_capacity(__str._M_allocated_capacity); |
747 | if (__data) |
748 | { |
749 | __str._M_data(__data); |
750 | __str._M_capacity(__capacity); |
751 | } |
752 | else |
753 | __str._M_data(__str._M_local_buf); |
754 | } |
755 | else // Need to do a deep copy |
756 | assign(__str); |
757 | __str.clear(); |
758 | return *this; |
759 | } |
760 | |
761 | /** |
762 | * @brief Set value to string constructed from initializer %list. |
763 | * @param __l std::initializer_list. |
764 | */ |
765 | basic_string& |
766 | operator=(initializer_list<_CharT> __l) |
767 | { |
768 | this->assign(__l.begin(), __l.size()); |
769 | return *this; |
770 | } |
771 | #endif // C++11 |
772 | |
773 | #if __cplusplus201703L >= 201703L |
774 | /** |
775 | * @brief Set value to string constructed from a string_view. |
776 | * @param __svt An object convertible to string_view. |
777 | */ |
778 | template<typename _Tp> |
779 | _If_sv<_Tp, basic_string&> |
780 | operator=(const _Tp& __svt) |
781 | { return this->assign(__svt); } |
782 | |
783 | /** |
784 | * @brief Convert to a string_view. |
785 | * @return A string_view. |
786 | */ |
787 | operator __sv_type() const noexcept |
788 | { return __sv_type(data(), size()); } |
789 | #endif // C++17 |
790 | |
791 | // Iterators: |
792 | /** |
793 | * Returns a read/write iterator that points to the first character in |
794 | * the %string. |
795 | */ |
796 | iterator |
797 | begin() _GLIBCXX_NOEXCEPTnoexcept |
798 | { return iterator(_M_data()); } |
799 | |
800 | /** |
801 | * Returns a read-only (constant) iterator that points to the first |
802 | * character in the %string. |
803 | */ |
804 | const_iterator |
805 | begin() const _GLIBCXX_NOEXCEPTnoexcept |
806 | { return const_iterator(_M_data()); } |
807 | |
808 | /** |
809 | * Returns a read/write iterator that points one past the last |
810 | * character in the %string. |
811 | */ |
812 | iterator |
813 | end() _GLIBCXX_NOEXCEPTnoexcept |
814 | { return iterator(_M_data() + this->size()); } |
815 | |
816 | /** |
817 | * Returns a read-only (constant) iterator that points one past the |
818 | * last character in the %string. |
819 | */ |
820 | const_iterator |
821 | end() const _GLIBCXX_NOEXCEPTnoexcept |
822 | { return const_iterator(_M_data() + this->size()); } |
823 | |
824 | /** |
825 | * Returns a read/write reverse iterator that points to the last |
826 | * character in the %string. Iteration is done in reverse element |
827 | * order. |
828 | */ |
829 | reverse_iterator |
830 | rbegin() _GLIBCXX_NOEXCEPTnoexcept |
831 | { return reverse_iterator(this->end()); } |
832 | |
833 | /** |
834 | * Returns a read-only (constant) reverse iterator that points |
835 | * to the last character in the %string. Iteration is done in |
836 | * reverse element order. |
837 | */ |
838 | const_reverse_iterator |
839 | rbegin() const _GLIBCXX_NOEXCEPTnoexcept |
840 | { return const_reverse_iterator(this->end()); } |
841 | |
842 | /** |
843 | * Returns a read/write reverse iterator that points to one before the |
844 | * first character in the %string. Iteration is done in reverse |
845 | * element order. |
846 | */ |
847 | reverse_iterator |
848 | rend() _GLIBCXX_NOEXCEPTnoexcept |
849 | { return reverse_iterator(this->begin()); } |
850 | |
851 | /** |
852 | * Returns a read-only (constant) reverse iterator that points |
853 | * to one before the first character in the %string. Iteration |
854 | * is done in reverse element order. |
855 | */ |
856 | const_reverse_iterator |
857 | rend() const _GLIBCXX_NOEXCEPTnoexcept |
858 | { return const_reverse_iterator(this->begin()); } |
859 | |
860 | #if __cplusplus201703L >= 201103L |
861 | /** |
862 | * Returns a read-only (constant) iterator that points to the first |
863 | * character in the %string. |
864 | */ |
865 | const_iterator |
866 | cbegin() const noexcept |
867 | { return const_iterator(this->_M_data()); } |
868 | |
869 | /** |
870 | * Returns a read-only (constant) iterator that points one past the |
871 | * last character in the %string. |
872 | */ |
873 | const_iterator |
874 | cend() const noexcept |
875 | { return const_iterator(this->_M_data() + this->size()); } |
876 | |
877 | /** |
878 | * Returns a read-only (constant) reverse iterator that points |
879 | * to the last character in the %string. Iteration is done in |
880 | * reverse element order. |
881 | */ |
882 | const_reverse_iterator |
883 | crbegin() const noexcept |
884 | { return const_reverse_iterator(this->end()); } |
885 | |
886 | /** |
887 | * Returns a read-only (constant) reverse iterator that points |
888 | * to one before the first character in the %string. Iteration |
889 | * is done in reverse element order. |
890 | */ |
891 | const_reverse_iterator |
892 | crend() const noexcept |
893 | { return const_reverse_iterator(this->begin()); } |
894 | #endif |
895 | |
896 | public: |
897 | // Capacity: |
898 | /// Returns the number of characters in the string, not including any |
899 | /// null-termination. |
900 | size_type |
901 | size() const _GLIBCXX_NOEXCEPTnoexcept |
902 | { return _M_string_length; } |
903 | |
904 | /// Returns the number of characters in the string, not including any |
905 | /// null-termination. |
906 | size_type |
907 | length() const _GLIBCXX_NOEXCEPTnoexcept |
908 | { return _M_string_length; } |
909 | |
910 | /// Returns the size() of the largest possible %string. |
911 | size_type |
912 | max_size() const _GLIBCXX_NOEXCEPTnoexcept |
913 | { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; } |
914 | |
915 | /** |
916 | * @brief Resizes the %string to the specified number of characters. |
917 | * @param __n Number of characters the %string should contain. |
918 | * @param __c Character to fill any new elements. |
919 | * |
920 | * This function will %resize the %string to the specified |
921 | * number of characters. If the number is smaller than the |
922 | * %string's current size the %string is truncated, otherwise |
923 | * the %string is extended and new elements are %set to @a __c. |
924 | */ |
925 | void |
926 | resize(size_type __n, _CharT __c); |
927 | |
928 | /** |
929 | * @brief Resizes the %string to the specified number of characters. |
930 | * @param __n Number of characters the %string should contain. |
931 | * |
932 | * This function will resize the %string to the specified length. If |
933 | * the new size is smaller than the %string's current size the %string |
934 | * is truncated, otherwise the %string is extended and new characters |
935 | * are default-constructed. For basic types such as char, this means |
936 | * setting them to 0. |
937 | */ |
938 | void |
939 | resize(size_type __n) |
940 | { this->resize(__n, _CharT()); } |
941 | |
942 | #if __cplusplus201703L >= 201103L |
943 | /// A non-binding request to reduce capacity() to size(). |
944 | void |
945 | shrink_to_fit() noexcept |
946 | { |
947 | #if __cpp_exceptions |
948 | if (capacity() > size()) |
949 | { |
950 | try |
951 | { reserve(0); } |
952 | catch(...) |
953 | { } |
954 | } |
955 | #endif |
956 | } |
957 | #endif |
958 | |
959 | /** |
960 | * Returns the total number of characters that the %string can hold |
961 | * before needing to allocate more memory. |
962 | */ |
963 | size_type |
964 | capacity() const _GLIBCXX_NOEXCEPTnoexcept |
965 | { |
966 | return _M_is_local() ? size_type(_S_local_capacity) |
967 | : _M_allocated_capacity; |
968 | } |
969 | |
970 | /** |
971 | * @brief Attempt to preallocate enough memory for specified number of |
972 | * characters. |
973 | * @param __res_arg Number of characters required. |
974 | * @throw std::length_error If @a __res_arg exceeds @c max_size(). |
975 | * |
976 | * This function attempts to reserve enough memory for the |
977 | * %string to hold the specified number of characters. If the |
978 | * number requested is more than max_size(), length_error is |
979 | * thrown. |
980 | * |
981 | * The advantage of this function is that if optimal code is a |
982 | * necessity and the user can determine the string length that will be |
983 | * required, the user can reserve the memory in %advance, and thus |
984 | * prevent a possible reallocation of memory and copying of %string |
985 | * data. |
986 | */ |
987 | void |
988 | reserve(size_type __res_arg = 0); |
989 | |
990 | /** |
991 | * Erases the string, making it empty. |
992 | */ |
993 | void |
994 | clear() _GLIBCXX_NOEXCEPTnoexcept |
995 | { _M_set_length(0); } |
996 | |
997 | /** |
998 | * Returns true if the %string is empty. Equivalent to |
999 | * <code>*this == ""</code>. |
1000 | */ |
1001 | _GLIBCXX_NODISCARD[[__nodiscard__]] bool |
1002 | empty() const _GLIBCXX_NOEXCEPTnoexcept |
1003 | { return this->size() == 0; } |
1004 | |
1005 | // Element access: |
1006 | /** |
1007 | * @brief Subscript access to the data contained in the %string. |
1008 | * @param __pos The index of the character to access. |
1009 | * @return Read-only (constant) reference to the character. |
1010 | * |
1011 | * This operator allows for easy, array-style, data access. |
1012 | * Note that data access with this operator is unchecked and |
1013 | * out_of_range lookups are not defined. (For checked lookups |
1014 | * see at().) |
1015 | */ |
1016 | const_reference |
1017 | operator[] (size_type __pos) const _GLIBCXX_NOEXCEPTnoexcept |
1018 | { |
1019 | __glibcxx_assert(__pos <= size())do { if (! (__pos <= size())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h" , 1019, __PRETTY_FUNCTION__, "__pos <= size()"); } while ( false); |
1020 | return _M_data()[__pos]; |
1021 | } |
1022 | |
1023 | /** |
1024 | * @brief Subscript access to the data contained in the %string. |
1025 | * @param __pos The index of the character to access. |
1026 | * @return Read/write reference to the character. |
1027 | * |
1028 | * This operator allows for easy, array-style, data access. |
1029 | * Note that data access with this operator is unchecked and |
1030 | * out_of_range lookups are not defined. (For checked lookups |
1031 | * see at().) |
1032 | */ |
1033 | reference |
1034 | operator[](size_type __pos) |
1035 | { |
1036 | // Allow pos == size() both in C++98 mode, as v3 extension, |
1037 | // and in C++11 mode. |
1038 | __glibcxx_assert(__pos <= size())do { if (! (__pos <= size())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h" , 1038, __PRETTY_FUNCTION__, "__pos <= size()"); } while ( false); |
1039 | // In pedantic mode be strict in C++98 mode. |
1040 | _GLIBCXX_DEBUG_PEDASSERT(__cplusplus >= 201103L || __pos < size()); |
1041 | return _M_data()[__pos]; |
1042 | } |
1043 | |
1044 | /** |
1045 | * @brief Provides access to the data contained in the %string. |
1046 | * @param __n The index of the character to access. |
1047 | * @return Read-only (const) reference to the character. |
1048 | * @throw std::out_of_range If @a n is an invalid index. |
1049 | * |
1050 | * This function provides for safer data access. The parameter is |
1051 | * first checked that it is in the range of the string. The function |
1052 | * throws out_of_range if the check fails. |
1053 | */ |
1054 | const_reference |
1055 | at(size_type __n) const |
1056 | { |
1057 | if (__n >= this->size()) |
1058 | __throw_out_of_range_fmt(__N("basic_string::at: __n "("basic_string::at: __n " "(which is %zu) >= this->size() " "(which is %zu)") |
1059 | "(which is %zu) >= this->size() "("basic_string::at: __n " "(which is %zu) >= this->size() " "(which is %zu)") |
1060 | "(which is %zu)")("basic_string::at: __n " "(which is %zu) >= this->size() " "(which is %zu)"), |
1061 | __n, this->size()); |
1062 | return _M_data()[__n]; |
1063 | } |
1064 | |
1065 | /** |
1066 | * @brief Provides access to the data contained in the %string. |
1067 | * @param __n The index of the character to access. |
1068 | * @return Read/write reference to the character. |
1069 | * @throw std::out_of_range If @a n is an invalid index. |
1070 | * |
1071 | * This function provides for safer data access. The parameter is |
1072 | * first checked that it is in the range of the string. The function |
1073 | * throws out_of_range if the check fails. |
1074 | */ |
1075 | reference |
1076 | at(size_type __n) |
1077 | { |
1078 | if (__n >= size()) |
1079 | __throw_out_of_range_fmt(__N("basic_string::at: __n "("basic_string::at: __n " "(which is %zu) >= this->size() " "(which is %zu)") |
1080 | "(which is %zu) >= this->size() "("basic_string::at: __n " "(which is %zu) >= this->size() " "(which is %zu)") |
1081 | "(which is %zu)")("basic_string::at: __n " "(which is %zu) >= this->size() " "(which is %zu)"), |
1082 | __n, this->size()); |
1083 | return _M_data()[__n]; |
1084 | } |
1085 | |
1086 | #if __cplusplus201703L >= 201103L |
1087 | /** |
1088 | * Returns a read/write reference to the data at the first |
1089 | * element of the %string. |
1090 | */ |
1091 | reference |
1092 | front() noexcept |
1093 | { |
1094 | __glibcxx_assert(!empty())do { if (! (!empty())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h" , 1094, __PRETTY_FUNCTION__, "!empty()"); } while (false); |
1095 | return operator[](0); |
1096 | } |
1097 | |
1098 | /** |
1099 | * Returns a read-only (constant) reference to the data at the first |
1100 | * element of the %string. |
1101 | */ |
1102 | const_reference |
1103 | front() const noexcept |
1104 | { |
1105 | __glibcxx_assert(!empty())do { if (! (!empty())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h" , 1105, __PRETTY_FUNCTION__, "!empty()"); } while (false); |
1106 | return operator[](0); |
1107 | } |
1108 | |
1109 | /** |
1110 | * Returns a read/write reference to the data at the last |
1111 | * element of the %string. |
1112 | */ |
1113 | reference |
1114 | back() noexcept |
1115 | { |
1116 | __glibcxx_assert(!empty())do { if (! (!empty())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h" , 1116, __PRETTY_FUNCTION__, "!empty()"); } while (false); |
1117 | return operator[](this->size() - 1); |
1118 | } |
1119 | |
1120 | /** |
1121 | * Returns a read-only (constant) reference to the data at the |
1122 | * last element of the %string. |
1123 | */ |
1124 | const_reference |
1125 | back() const noexcept |
1126 | { |
1127 | __glibcxx_assert(!empty())do { if (! (!empty())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h" , 1127, __PRETTY_FUNCTION__, "!empty()"); } while (false); |
1128 | return operator[](this->size() - 1); |
1129 | } |
1130 | #endif |
1131 | |
1132 | // Modifiers: |
1133 | /** |
1134 | * @brief Append a string to this string. |
1135 | * @param __str The string to append. |
1136 | * @return Reference to this string. |
1137 | */ |
1138 | basic_string& |
1139 | operator+=(const basic_string& __str) |
1140 | { return this->append(__str); } |
1141 | |
1142 | /** |
1143 | * @brief Append a C string. |
1144 | * @param __s The C string to append. |
1145 | * @return Reference to this string. |
1146 | */ |
1147 | basic_string& |
1148 | operator+=(const _CharT* __s) |
1149 | { return this->append(__s); } |
1150 | |
1151 | /** |
1152 | * @brief Append a character. |
1153 | * @param __c The character to append. |
1154 | * @return Reference to this string. |
1155 | */ |
1156 | basic_string& |
1157 | operator+=(_CharT __c) |
1158 | { |
1159 | this->push_back(__c); |
1160 | return *this; |
1161 | } |
1162 | |
1163 | #if __cplusplus201703L >= 201103L |
1164 | /** |
1165 | * @brief Append an initializer_list of characters. |
1166 | * @param __l The initializer_list of characters to be appended. |
1167 | * @return Reference to this string. |
1168 | */ |
1169 | basic_string& |
1170 | operator+=(initializer_list<_CharT> __l) |
1171 | { return this->append(__l.begin(), __l.size()); } |
1172 | #endif // C++11 |
1173 | |
1174 | #if __cplusplus201703L >= 201703L |
1175 | /** |
1176 | * @brief Append a string_view. |
1177 | * @param __svt An object convertible to string_view to be appended. |
1178 | * @return Reference to this string. |
1179 | */ |
1180 | template<typename _Tp> |
1181 | _If_sv<_Tp, basic_string&> |
1182 | operator+=(const _Tp& __svt) |
1183 | { return this->append(__svt); } |
1184 | #endif // C++17 |
1185 | |
1186 | /** |
1187 | * @brief Append a string to this string. |
1188 | * @param __str The string to append. |
1189 | * @return Reference to this string. |
1190 | */ |
1191 | basic_string& |
1192 | append(const basic_string& __str) |
1193 | { return _M_append(__str._M_data(), __str.size()); } |
1194 | |
1195 | /** |
1196 | * @brief Append a substring. |
1197 | * @param __str The string to append. |
1198 | * @param __pos Index of the first character of str to append. |
1199 | * @param __n The number of characters to append. |
1200 | * @return Reference to this string. |
1201 | * @throw std::out_of_range if @a __pos is not a valid index. |
1202 | * |
1203 | * This function appends @a __n characters from @a __str |
1204 | * starting at @a __pos to this string. If @a __n is is larger |
1205 | * than the number of available characters in @a __str, the |
1206 | * remainder of @a __str is appended. |
1207 | */ |
1208 | basic_string& |
1209 | append(const basic_string& __str, size_type __pos, size_type __n = npos) |
1210 | { return _M_append(__str._M_data() |
1211 | + __str._M_check(__pos, "basic_string::append"), |
1212 | __str._M_limit(__pos, __n)); } |
1213 | |
1214 | /** |
1215 | * @brief Append a C substring. |
1216 | * @param __s The C string to append. |
1217 | * @param __n The number of characters to append. |
1218 | * @return Reference to this string. |
1219 | */ |
1220 | basic_string& |
1221 | append(const _CharT* __s, size_type __n) |
1222 | { |
1223 | __glibcxx_requires_string_len(__s, __n); |
1224 | _M_check_length(size_type(0), __n, "basic_string::append"); |
1225 | return _M_append(__s, __n); |
1226 | } |
1227 | |
1228 | /** |
1229 | * @brief Append a C string. |
1230 | * @param __s The C string to append. |
1231 | * @return Reference to this string. |
1232 | */ |
1233 | basic_string& |
1234 | append(const _CharT* __s) |
1235 | { |
1236 | __glibcxx_requires_string(__s); |
1237 | const size_type __n = traits_type::length(__s); |
1238 | _M_check_length(size_type(0), __n, "basic_string::append"); |
1239 | return _M_append(__s, __n); |
1240 | } |
1241 | |
1242 | /** |
1243 | * @brief Append multiple characters. |
1244 | * @param __n The number of characters to append. |
1245 | * @param __c The character to use. |
1246 | * @return Reference to this string. |
1247 | * |
1248 | * Appends __n copies of __c to this string. |
1249 | */ |
1250 | basic_string& |
1251 | append(size_type __n, _CharT __c) |
1252 | { return _M_replace_aux(this->size(), size_type(0), __n, __c); } |
1253 | |
1254 | #if __cplusplus201703L >= 201103L |
1255 | /** |
1256 | * @brief Append an initializer_list of characters. |
1257 | * @param __l The initializer_list of characters to append. |
1258 | * @return Reference to this string. |
1259 | */ |
1260 | basic_string& |
1261 | append(initializer_list<_CharT> __l) |
1262 | { return this->append(__l.begin(), __l.size()); } |
1263 | #endif // C++11 |
1264 | |
1265 | /** |
1266 | * @brief Append a range of characters. |
1267 | * @param __first Iterator referencing the first character to append. |
1268 | * @param __last Iterator marking the end of the range. |
1269 | * @return Reference to this string. |
1270 | * |
1271 | * Appends characters in the range [__first,__last) to this string. |
1272 | */ |
1273 | #if __cplusplus201703L >= 201103L |
1274 | template<class _InputIterator, |
1275 | typename = std::_RequireInputIter<_InputIterator>> |
1276 | #else |
1277 | template<class _InputIterator> |
1278 | #endif |
1279 | basic_string& |
1280 | append(_InputIterator __first, _InputIterator __last) |
1281 | { return this->replace(end(), end(), __first, __last); } |
1282 | |
1283 | #if __cplusplus201703L >= 201703L |
1284 | /** |
1285 | * @brief Append a string_view. |
1286 | * @param __svt An object convertible to string_view to be appended. |
1287 | * @return Reference to this string. |
1288 | */ |
1289 | template<typename _Tp> |
1290 | _If_sv<_Tp, basic_string&> |
1291 | append(const _Tp& __svt) |
1292 | { |
1293 | __sv_type __sv = __svt; |
1294 | return this->append(__sv.data(), __sv.size()); |
1295 | } |
1296 | |
1297 | /** |
1298 | * @brief Append a range of characters from a string_view. |
1299 | * @param __svt An object convertible to string_view to be appended from. |
1300 | * @param __pos The position in the string_view to append from. |
1301 | * @param __n The number of characters to append from the string_view. |
1302 | * @return Reference to this string. |
1303 | */ |
1304 | template<typename _Tp> |
1305 | _If_sv<_Tp, basic_string&> |
1306 | append(const _Tp& __svt, size_type __pos, size_type __n = npos) |
1307 | { |
1308 | __sv_type __sv = __svt; |
1309 | return _M_append(__sv.data() |
1310 | + std::__sv_check(__sv.size(), __pos, "basic_string::append"), |
1311 | std::__sv_limit(__sv.size(), __pos, __n)); |
1312 | } |
1313 | #endif // C++17 |
1314 | |
1315 | /** |
1316 | * @brief Append a single character. |
1317 | * @param __c Character to append. |
1318 | */ |
1319 | void |
1320 | push_back(_CharT __c) |
1321 | { |
1322 | const size_type __size = this->size(); |
1323 | if (__size + 1 > this->capacity()) |
1324 | this->_M_mutate(__size, size_type(0), 0, size_type(1)); |
1325 | traits_type::assign(this->_M_data()[__size], __c); |
1326 | this->_M_set_length(__size + 1); |
1327 | } |
1328 | |
1329 | /** |
1330 | * @brief Set value to contents of another string. |
1331 | * @param __str Source string to use. |
1332 | * @return Reference to this string. |
1333 | */ |
1334 | basic_string& |
1335 | assign(const basic_string& __str) |
1336 | { |
1337 | #if __cplusplus201703L >= 201103L |
1338 | if (_Alloc_traits::_S_propagate_on_copy_assign()) |
1339 | { |
1340 | if (!_Alloc_traits::_S_always_equal() && !_M_is_local() |
1341 | && _M_get_allocator() != __str._M_get_allocator()) |
1342 | { |
1343 | // Propagating allocator cannot free existing storage so must |
1344 | // deallocate it before replacing current allocator. |
1345 | if (__str.size() <= _S_local_capacity) |
1346 | { |
1347 | _M_destroy(_M_allocated_capacity); |
1348 | _M_data(_M_local_data()); |
1349 | _M_set_length(0); |
1350 | } |
1351 | else |
1352 | { |
1353 | const auto __len = __str.size(); |
1354 | auto __alloc = __str._M_get_allocator(); |
1355 | // If this allocation throws there are no effects: |
1356 | auto __ptr = _Alloc_traits::allocate(__alloc, __len + 1); |
1357 | _M_destroy(_M_allocated_capacity); |
1358 | _M_data(__ptr); |
1359 | _M_capacity(__len); |
1360 | _M_set_length(__len); |
1361 | } |
1362 | } |
1363 | std::__alloc_on_copy(_M_get_allocator(), __str._M_get_allocator()); |
1364 | } |
1365 | #endif |
1366 | this->_M_assign(__str); |
1367 | return *this; |
1368 | } |
1369 | |
1370 | #if __cplusplus201703L >= 201103L |
1371 | /** |
1372 | * @brief Set value to contents of another string. |
1373 | * @param __str Source string to use. |
1374 | * @return Reference to this string. |
1375 | * |
1376 | * This function sets this string to the exact contents of @a __str. |
1377 | * @a __str is a valid, but unspecified string. |
1378 | */ |
1379 | basic_string& |
1380 | assign(basic_string&& __str) |
1381 | noexcept(_Alloc_traits::_S_nothrow_move()) |
1382 | { |
1383 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
1384 | // 2063. Contradictory requirements for string move assignment |
1385 | return *this = std::move(__str); |
1386 | } |
1387 | #endif // C++11 |
1388 | |
1389 | /** |
1390 | * @brief Set value to a substring of a string. |
1391 | * @param __str The string to use. |
1392 | * @param __pos Index of the first character of str. |
1393 | * @param __n Number of characters to use. |
1394 | * @return Reference to this string. |
1395 | * @throw std::out_of_range if @a pos is not a valid index. |
1396 | * |
1397 | * This function sets this string to the substring of @a __str |
1398 | * consisting of @a __n characters at @a __pos. If @a __n is |
1399 | * is larger than the number of available characters in @a |
1400 | * __str, the remainder of @a __str is used. |
1401 | */ |
1402 | basic_string& |
1403 | assign(const basic_string& __str, size_type __pos, size_type __n = npos) |
1404 | { return _M_replace(size_type(0), this->size(), __str._M_data() |
1405 | + __str._M_check(__pos, "basic_string::assign"), |
1406 | __str._M_limit(__pos, __n)); } |
1407 | |
1408 |