Bug Summary

File:build/source/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Warning:line 3361, column 21
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SimpleLoopUnswitch.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-17/lib/clang/17 -I lib/Transforms/Scalar -I /build/source/llvm/lib/Transforms/Scalar -I include -I /build/source/llvm/include -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

/build/source/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
14#include "llvm/ADT/SmallPtrSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Analysis/AssumptionCache.h"
19#include "llvm/Analysis/BlockFrequencyInfo.h"
20#include "llvm/Analysis/CFG.h"
21#include "llvm/Analysis/CodeMetrics.h"
22#include "llvm/Analysis/DomTreeUpdater.h"
23#include "llvm/Analysis/GuardUtils.h"
24#include "llvm/Analysis/LoopAnalysisManager.h"
25#include "llvm/Analysis/LoopInfo.h"
26#include "llvm/Analysis/LoopIterator.h"
27#include "llvm/Analysis/LoopPass.h"
28#include "llvm/Analysis/MemorySSA.h"
29#include "llvm/Analysis/MemorySSAUpdater.h"
30#include "llvm/Analysis/MustExecute.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/Analysis/ScalarEvolution.h"
33#include "llvm/Analysis/TargetTransformInfo.h"
34#include "llvm/Analysis/ValueTracking.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/Constant.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicInst.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/ProfDataUtils.h"
47#include "llvm/IR/Use.h"
48#include "llvm/IR/Value.h"
49#include "llvm/InitializePasses.h"
50#include "llvm/Pass.h"
51#include "llvm/Support/Casting.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Debug.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/GenericDomTree.h"
56#include "llvm/Support/InstructionCost.h"
57#include "llvm/Support/raw_ostream.h"
58#include "llvm/Transforms/Scalar/LoopPassManager.h"
59#include "llvm/Transforms/Utils/BasicBlockUtils.h"
60#include "llvm/Transforms/Utils/Cloning.h"
61#include "llvm/Transforms/Utils/Local.h"
62#include "llvm/Transforms/Utils/LoopUtils.h"
63#include "llvm/Transforms/Utils/ValueMapper.h"
64#include <algorithm>
65#include <cassert>
66#include <iterator>
67#include <numeric>
68#include <optional>
69#include <utility>
70
71#define DEBUG_TYPE"simple-loop-unswitch" "simple-loop-unswitch"
72
73using namespace llvm;
74using namespace llvm::PatternMatch;
75
76STATISTIC(NumBranches, "Number of branches unswitched")static llvm::Statistic NumBranches = {"simple-loop-unswitch",
"NumBranches", "Number of branches unswitched"}
;
77STATISTIC(NumSwitches, "Number of switches unswitched")static llvm::Statistic NumSwitches = {"simple-loop-unswitch",
"NumSwitches", "Number of switches unswitched"}
;
78STATISTIC(NumSelects, "Number of selects turned into branches for unswitching")static llvm::Statistic NumSelects = {"simple-loop-unswitch", "NumSelects"
, "Number of selects turned into branches for unswitching"}
;
79STATISTIC(NumGuards, "Number of guards turned into branches for unswitching")static llvm::Statistic NumGuards = {"simple-loop-unswitch", "NumGuards"
, "Number of guards turned into branches for unswitching"}
;
80STATISTIC(NumTrivial, "Number of unswitches that are trivial")static llvm::Statistic NumTrivial = {"simple-loop-unswitch", "NumTrivial"
, "Number of unswitches that are trivial"}
;
81STATISTIC(static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
82 NumCostMultiplierSkipped,static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
83 "Number of unswitch candidates that had their cost multiplier skipped")static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
;
84STATISTIC(NumInvariantConditionsInjected,static llvm::Statistic NumInvariantConditionsInjected = {"simple-loop-unswitch"
, "NumInvariantConditionsInjected", "Number of invariant conditions injected and unswitched"
}
85 "Number of invariant conditions injected and unswitched")static llvm::Statistic NumInvariantConditionsInjected = {"simple-loop-unswitch"
, "NumInvariantConditionsInjected", "Number of invariant conditions injected and unswitched"
}
;
86
87static cl::opt<bool> EnableNonTrivialUnswitch(
88 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
89 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
90 "following the configuration passed into the pass."));
91
92static cl::opt<int>
93 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
94 cl::desc("The cost threshold for unswitching a loop."));
95
96static cl::opt<bool> EnableUnswitchCostMultiplier(
97 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
98 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
99 "explosion in nontrivial unswitch."));
100static cl::opt<int> UnswitchSiblingsToplevelDiv(
101 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
102 cl::desc("Toplevel siblings divisor for cost multiplier."));
103static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
104 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
105 cl::desc("Number of unswitch candidates that are ignored when calculating "
106 "cost multiplier."));
107static cl::opt<bool> UnswitchGuards(
108 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
109 cl::desc("If enabled, simple loop unswitching will also consider "
110 "llvm.experimental.guard intrinsics as unswitch candidates."));
111static cl::opt<bool> DropNonTrivialImplicitNullChecks(
112 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
113 cl::init(false), cl::Hidden,
114 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
115 "null checks to save time analyzing if we can keep it."));
116static cl::opt<unsigned>
117 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
118 cl::desc("Max number of memory uses to explore during "
119 "partial unswitching analysis"),
120 cl::init(100), cl::Hidden);
121static cl::opt<bool> FreezeLoopUnswitchCond(
122 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
123 cl::desc("If enabled, the freeze instruction will be added to condition "
124 "of loop unswitch to prevent miscompilation."));
125
126static cl::opt<bool> InjectInvariantConditions(
127 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
128 cl::desc("Whether we should inject new invariants and unswitch them to "
129 "eliminate some existing (non-invariant) conditions."),
130 cl::init(true));
131
132static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
133 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
134 cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
135 "unswitch on them to eliminate branches that are "
136 "not-taken 1/<this option> times or less."),
137 cl::init(16));
138
139namespace {
140struct CompareDesc {
141 BranchInst *Term;
142 Value *Invariant;
143 BasicBlock *InLoopSucc;
144
145 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
146 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
147};
148
149struct InjectedInvariant {
150 ICmpInst::Predicate Pred;
151 Value *LHS;
152 Value *RHS;
153 BasicBlock *InLoopSucc;
154
155 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
156 BasicBlock *InLoopSucc)
157 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
158};
159
160struct NonTrivialUnswitchCandidate {
161 Instruction *TI = nullptr;
162 TinyPtrVector<Value *> Invariants;
163 std::optional<InstructionCost> Cost;
164 std::optional<InjectedInvariant> PendingInjection;
165 NonTrivialUnswitchCandidate(
166 Instruction *TI, ArrayRef<Value *> Invariants,
167 std::optional<InstructionCost> Cost = std::nullopt,
168 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
169 : TI(TI), Invariants(Invariants), Cost(Cost),
170 PendingInjection(PendingInjection) {};
171
172 bool hasPendingInjection() const { return PendingInjection.has_value(); }
173};
174} // end anonymous namespace.
175
176// Helper to skip (select x, true, false), which matches both a logical AND and
177// OR and can confuse code that tries to determine if \p Cond is either a
178// logical AND or OR but not both.
179static Value *skipTrivialSelect(Value *Cond) {
180 Value *CondNext;
181 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
182 Cond = CondNext;
183 return Cond;
184}
185
186/// Collect all of the loop invariant input values transitively used by the
187/// homogeneous instruction graph from a given root.
188///
189/// This essentially walks from a root recursively through loop variant operands
190/// which have perform the same logical operation (AND or OR) and finds all
191/// inputs which are loop invariant. For some operations these can be
192/// re-associated and unswitched out of the loop entirely.
193static TinyPtrVector<Value *>
194collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root,
195 const LoopInfo &LI) {
196 assert(!L.isLoopInvariant(&Root) &&(static_cast <bool> (!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant."
) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 197, __extension__
__PRETTY_FUNCTION__))
197 "Only need to walk the graph if root itself is not invariant.")(static_cast <bool> (!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant."
) ? void (0) : __assert_fail ("!L.isLoopInvariant(&Root) && \"Only need to walk the graph if root itself is not invariant.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 197, __extension__
__PRETTY_FUNCTION__))
;
198 TinyPtrVector<Value *> Invariants;
199
200 bool IsRootAnd = match(&Root, m_LogicalAnd());
201 bool IsRootOr = match(&Root, m_LogicalOr());
202
203 // Build a worklist and recurse through operators collecting invariants.
204 SmallVector<Instruction *, 4> Worklist;
205 SmallPtrSet<Instruction *, 8> Visited;
206 Worklist.push_back(&Root);
207 Visited.insert(&Root);
208 do {
209 Instruction &I = *Worklist.pop_back_val();
210 for (Value *OpV : I.operand_values()) {
211 // Skip constants as unswitching isn't interesting for them.
212 if (isa<Constant>(OpV))
213 continue;
214
215 // Add it to our result if loop invariant.
216 if (L.isLoopInvariant(OpV)) {
217 Invariants.push_back(OpV);
218 continue;
219 }
220
221 // If not an instruction with the same opcode, nothing we can do.
222 Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
223
224 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
225 (IsRootOr && match(OpI, m_LogicalOr())))) {
226 // Visit this operand.
227 if (Visited.insert(OpI).second)
228 Worklist.push_back(OpI);
229 }
230 }
231 } while (!Worklist.empty());
232
233 return Invariants;
234}
235
236static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
237 Constant &Replacement) {
238 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?")(static_cast <bool> (!isa<Constant>(Invariant) &&
"Why are we unswitching on a constant?") ? void (0) : __assert_fail
("!isa<Constant>(Invariant) && \"Why are we unswitching on a constant?\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 238, __extension__
__PRETTY_FUNCTION__))
;
239
240 // Replace uses of LIC in the loop with the given constant.
241 // We use make_early_inc_range as set invalidates the iterator.
242 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
243 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
244
245 // Replace this use within the loop body.
246 if (UserI && L.contains(UserI))
247 U.set(&Replacement);
248 }
249}
250
251/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
252/// incoming values along this edge.
253static bool areLoopExitPHIsLoopInvariant(const Loop &L,
254 const BasicBlock &ExitingBB,
255 const BasicBlock &ExitBB) {
256 for (const Instruction &I : ExitBB) {
257 auto *PN = dyn_cast<PHINode>(&I);
258 if (!PN)
259 // No more PHIs to check.
260 return true;
261
262 // If the incoming value for this edge isn't loop invariant the unswitch
263 // won't be trivial.
264 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
265 return false;
266 }
267 llvm_unreachable("Basic blocks should never be empty!")::llvm::llvm_unreachable_internal("Basic blocks should never be empty!"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 267)
;
268}
269
270/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
271/// end of \p BB and conditionally branch on the copied condition. We only
272/// branch on a single value.
273static void buildPartialUnswitchConditionalBranch(
274 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
275 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
276 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
277 IRBuilder<> IRB(&BB);
278
279 SmallVector<Value *> FrozenInvariants;
280 for (Value *Inv : Invariants) {
281 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
282 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
283 FrozenInvariants.push_back(Inv);
284 }
285
286 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
287 : IRB.CreateAnd(FrozenInvariants);
288 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
289 Direction ? &NormalSucc : &UnswitchedSucc);
290}
291
292/// Copy a set of loop invariant values, and conditionally branch on them.
293static void buildPartialInvariantUnswitchConditionalBranch(
294 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
295 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
296 MemorySSAUpdater *MSSAU) {
297 ValueToValueMapTy VMap;
298 for (auto *Val : reverse(ToDuplicate)) {
299 Instruction *Inst = cast<Instruction>(Val);
300 Instruction *NewInst = Inst->clone();
301 NewInst->insertInto(&BB, BB.end());
302 RemapInstruction(NewInst, VMap,
303 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
304 VMap[Val] = NewInst;
305
306 if (!MSSAU)
307 continue;
308
309 MemorySSA *MSSA = MSSAU->getMemorySSA();
310 if (auto *MemUse =
311 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
312 auto *DefiningAccess = MemUse->getDefiningAccess();
313 // Get the first defining access before the loop.
314 while (L.contains(DefiningAccess->getBlock())) {
315 // If the defining access is a MemoryPhi, get the incoming
316 // value for the pre-header as defining access.
317 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
318 DefiningAccess =
319 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
320 else
321 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
322 }
323 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
324 NewInst->getParent(),
325 MemorySSA::BeforeTerminator);
326 }
327 }
328
329 IRBuilder<> IRB(&BB);
330 Value *Cond = VMap[ToDuplicate[0]];
331 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
332 Direction ? &NormalSucc : &UnswitchedSucc);
333}
334
335/// Rewrite the PHI nodes in an unswitched loop exit basic block.
336///
337/// Requires that the loop exit and unswitched basic block are the same, and
338/// that the exiting block was a unique predecessor of that block. Rewrites the
339/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
340/// PHI nodes from the old preheader that now contains the unswitched
341/// terminator.
342static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
343 BasicBlock &OldExitingBB,
344 BasicBlock &OldPH) {
345 for (PHINode &PN : UnswitchedBB.phis()) {
346 // When the loop exit is directly unswitched we just need to update the
347 // incoming basic block. We loop to handle weird cases with repeated
348 // incoming blocks, but expect to typically only have one operand here.
349 for (auto i : seq<int>(0, PN.getNumOperands())) {
350 assert(PN.getIncomingBlock(i) == &OldExitingBB &&(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB
&& "Found incoming block different from unique predecessor!"
) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 351, __extension__
__PRETTY_FUNCTION__))
351 "Found incoming block different from unique predecessor!")(static_cast <bool> (PN.getIncomingBlock(i) == &OldExitingBB
&& "Found incoming block different from unique predecessor!"
) ? void (0) : __assert_fail ("PN.getIncomingBlock(i) == &OldExitingBB && \"Found incoming block different from unique predecessor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 351, __extension__
__PRETTY_FUNCTION__))
;
352 PN.setIncomingBlock(i, &OldPH);
353 }
354 }
355}
356
357/// Rewrite the PHI nodes in the loop exit basic block and the split off
358/// unswitched block.
359///
360/// Because the exit block remains an exit from the loop, this rewrites the
361/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
362/// nodes into the unswitched basic block to select between the value in the
363/// old preheader and the loop exit.
364static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
365 BasicBlock &UnswitchedBB,
366 BasicBlock &OldExitingBB,
367 BasicBlock &OldPH,
368 bool FullUnswitch) {
369 assert(&ExitBB != &UnswitchedBB &&(static_cast <bool> (&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!") ? void
(0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 370, __extension__
__PRETTY_FUNCTION__))
370 "Must have different loop exit and unswitched blocks!")(static_cast <bool> (&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!") ? void
(0) : __assert_fail ("&ExitBB != &UnswitchedBB && \"Must have different loop exit and unswitched blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 370, __extension__
__PRETTY_FUNCTION__))
;
371 Instruction *InsertPt = &*UnswitchedBB.begin();
372 for (PHINode &PN : ExitBB.phis()) {
373 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
374 PN.getName() + ".split", InsertPt);
375
376 // Walk backwards over the old PHI node's inputs to minimize the cost of
377 // removing each one. We have to do this weird loop manually so that we
378 // create the same number of new incoming edges in the new PHI as we expect
379 // each case-based edge to be included in the unswitched switch in some
380 // cases.
381 // FIXME: This is really, really gross. It would be much cleaner if LLVM
382 // allowed us to create a single entry for a predecessor block without
383 // having separate entries for each "edge" even though these edges are
384 // required to produce identical results.
385 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
386 if (PN.getIncomingBlock(i) != &OldExitingBB)
387 continue;
388
389 Value *Incoming = PN.getIncomingValue(i);
390 if (FullUnswitch)
391 // No more edge from the old exiting block to the exit block.
392 PN.removeIncomingValue(i);
393
394 NewPN->addIncoming(Incoming, &OldPH);
395 }
396
397 // Now replace the old PHI with the new one and wire the old one in as an
398 // input to the new one.
399 PN.replaceAllUsesWith(NewPN);
400 NewPN->addIncoming(&PN, &ExitBB);
401 }
402}
403
404/// Hoist the current loop up to the innermost loop containing a remaining exit.
405///
406/// Because we've removed an exit from the loop, we may have changed the set of
407/// loops reachable and need to move the current loop up the loop nest or even
408/// to an entirely separate nest.
409static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
410 DominatorTree &DT, LoopInfo &LI,
411 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
412 // If the loop is already at the top level, we can't hoist it anywhere.
413 Loop *OldParentL = L.getParentLoop();
414 if (!OldParentL)
415 return;
416
417 SmallVector<BasicBlock *, 4> Exits;
418 L.getExitBlocks(Exits);
419 Loop *NewParentL = nullptr;
420 for (auto *ExitBB : Exits)
421 if (Loop *ExitL = LI.getLoopFor(ExitBB))
422 if (!NewParentL || NewParentL->contains(ExitL))
423 NewParentL = ExitL;
424
425 if (NewParentL == OldParentL)
426 return;
427
428 // The new parent loop (if different) should always contain the old one.
429 if (NewParentL)
430 assert(NewParentL->contains(OldParentL) &&(static_cast <bool> (NewParentL->contains(OldParentL
) && "Can only hoist this loop up the nest!") ? void (
0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 431, __extension__
__PRETTY_FUNCTION__))
431 "Can only hoist this loop up the nest!")(static_cast <bool> (NewParentL->contains(OldParentL
) && "Can only hoist this loop up the nest!") ? void (
0) : __assert_fail ("NewParentL->contains(OldParentL) && \"Can only hoist this loop up the nest!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 431, __extension__
__PRETTY_FUNCTION__))
;
432
433 // The preheader will need to move with the body of this loop. However,
434 // because it isn't in this loop we also need to update the primary loop map.
435 assert(OldParentL == LI.getLoopFor(&Preheader) &&(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader
) && "Parent loop of this loop should contain this loop's preheader!"
) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 436, __extension__
__PRETTY_FUNCTION__))
436 "Parent loop of this loop should contain this loop's preheader!")(static_cast <bool> (OldParentL == LI.getLoopFor(&Preheader
) && "Parent loop of this loop should contain this loop's preheader!"
) ? void (0) : __assert_fail ("OldParentL == LI.getLoopFor(&Preheader) && \"Parent loop of this loop should contain this loop's preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 436, __extension__
__PRETTY_FUNCTION__))
;
437 LI.changeLoopFor(&Preheader, NewParentL);
438
439 // Remove this loop from its old parent.
440 OldParentL->removeChildLoop(&L);
441
442 // Add the loop either to the new parent or as a top-level loop.
443 if (NewParentL)
444 NewParentL->addChildLoop(&L);
445 else
446 LI.addTopLevelLoop(&L);
447
448 // Remove this loops blocks from the old parent and every other loop up the
449 // nest until reaching the new parent. Also update all of these
450 // no-longer-containing loops to reflect the nesting change.
451 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
452 OldContainingL = OldContainingL->getParentLoop()) {
453 llvm::erase_if(OldContainingL->getBlocksVector(),
454 [&](const BasicBlock *BB) {
455 return BB == &Preheader || L.contains(BB);
456 });
457
458 OldContainingL->getBlocksSet().erase(&Preheader);
459 for (BasicBlock *BB : L.blocks())
460 OldContainingL->getBlocksSet().erase(BB);
461
462 // Because we just hoisted a loop out of this one, we have essentially
463 // created new exit paths from it. That means we need to form LCSSA PHI
464 // nodes for values used in the no-longer-nested loop.
465 formLCSSA(*OldContainingL, DT, &LI);
466
467 // We shouldn't need to form dedicated exits because the exit introduced
468 // here is the (just split by unswitching) preheader. However, after trivial
469 // unswitching it is possible to get new non-dedicated exits out of parent
470 // loop so let's conservatively form dedicated exit blocks and figure out
471 // if we can optimize later.
472 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
473 /*PreserveLCSSA*/ true);
474 }
475}
476
477// Return the top-most loop containing ExitBB and having ExitBB as exiting block
478// or the loop containing ExitBB, if there is no parent loop containing ExitBB
479// as exiting block.
480static Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
481 const LoopInfo &LI) {
482 Loop *TopMost = LI.getLoopFor(ExitBB);
483 Loop *Current = TopMost;
484 while (Current) {
485 if (Current->isLoopExiting(ExitBB))
486 TopMost = Current;
487 Current = Current->getParentLoop();
488 }
489 return TopMost;
490}
491
492/// Unswitch a trivial branch if the condition is loop invariant.
493///
494/// This routine should only be called when loop code leading to the branch has
495/// been validated as trivial (no side effects). This routine checks if the
496/// condition is invariant and one of the successors is a loop exit. This
497/// allows us to unswitch without duplicating the loop, making it trivial.
498///
499/// If this routine fails to unswitch the branch it returns false.
500///
501/// If the branch can be unswitched, this routine splits the preheader and
502/// hoists the branch above that split. Preserves loop simplified form
503/// (splitting the exit block as necessary). It simplifies the branch within
504/// the loop to an unconditional branch but doesn't remove it entirely. Further
505/// cleanup can be done with some simplifycfg like pass.
506///
507/// If `SE` is not null, it will be updated based on the potential loop SCEVs
508/// invalidated by this.
509static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
510 LoopInfo &LI, ScalarEvolution *SE,
511 MemorySSAUpdater *MSSAU) {
512 assert(BI.isConditional() && "Can only unswitch a conditional branch!")(static_cast <bool> (BI.isConditional() && "Can only unswitch a conditional branch!"
) ? void (0) : __assert_fail ("BI.isConditional() && \"Can only unswitch a conditional branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 512, __extension__
__PRETTY_FUNCTION__))
;
513 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Trying to unswitch branch: "
<< BI << "\n"; } } while (false)
;
514
515 // The loop invariant values that we want to unswitch.
516 TinyPtrVector<Value *> Invariants;
517
518 // When true, we're fully unswitching the branch rather than just unswitching
519 // some input conditions to the branch.
520 bool FullUnswitch = false;
521
522 Value *Cond = skipTrivialSelect(BI.getCondition());
523 if (L.isLoopInvariant(Cond)) {
524 Invariants.push_back(Cond);
525 FullUnswitch = true;
526 } else {
527 if (auto *CondInst = dyn_cast<Instruction>(Cond))
528 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
529 if (Invariants.empty()) {
530 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Couldn't find invariant inputs!\n"
; } } while (false)
;
531 return false;
532 }
533 }
534
535 // Check that one of the branch's successors exits, and which one.
536 bool ExitDirection = true;
537 int LoopExitSuccIdx = 0;
538 auto *LoopExitBB = BI.getSuccessor(0);
539 if (L.contains(LoopExitBB)) {
540 ExitDirection = false;
541 LoopExitSuccIdx = 1;
542 LoopExitBB = BI.getSuccessor(1);
543 if (L.contains(LoopExitBB)) {
544 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Branch doesn't exit the loop!\n"
; } } while (false)
;
545 return false;
546 }
547 }
548 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
549 auto *ParentBB = BI.getParent();
550 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
551 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Loop exit PHI's aren't loop-invariant!\n"
; } } while (false)
;
552 return false;
553 }
554
555 // When unswitching only part of the branch's condition, we need the exit
556 // block to be reached directly from the partially unswitched input. This can
557 // be done when the exit block is along the true edge and the branch condition
558 // is a graph of `or` operations, or the exit block is along the false edge
559 // and the condition is a graph of `and` operations.
560 if (!FullUnswitch) {
561 if (ExitDirection ? !match(Cond, m_LogicalOr())
562 : !match(Cond, m_LogicalAnd())) {
563 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Branch condition is in improper form for "
"non-full unswitch!\n"; } } while (false)
564 "non-full unswitch!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Branch condition is in improper form for "
"non-full unswitch!\n"; } } while (false)
;
565 return false;
566 }
567 }
568
569 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
570 dbgs() << " unswitching trivial invariant conditions for: " << BIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
571 << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
572 for (Value *Invariant : Invariants) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
573 dbgs() << " " << *Invariant << " == true";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
574 if (Invariant != Invariants.back())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
575 dbgs() << " ||";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
576 dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
577 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
578 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { { dbgs() << " unswitching trivial invariant conditions for: "
<< BI << "\n"; for (Value *Invariant : Invariants
) { dbgs() << " " << *Invariant << " == true"
; if (Invariant != Invariants.back()) dbgs() << " ||"; dbgs
() << "\n"; } }; } } while (false)
;
579
580 // If we have scalar evolutions, we need to invalidate them including this
581 // loop, the loop containing the exit block and the topmost parent loop
582 // exiting via LoopExitBB.
583 if (SE) {
584 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
585 SE->forgetLoop(ExitL);
586 else
587 // Forget the entire nest as this exits the entire nest.
588 SE->forgetTopmostLoop(&L);
589 SE->forgetBlockAndLoopDispositions();
590 }
591
592 if (MSSAU && VerifyMemorySSA)
593 MSSAU->getMemorySSA()->verifyMemorySSA();
594
595 // Split the preheader, so that we know that there is a safe place to insert
596 // the conditional branch. We will change the preheader to have a conditional
597 // branch on LoopCond.
598 BasicBlock *OldPH = L.getLoopPreheader();
599 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
600
601 // Now that we have a place to insert the conditional branch, create a place
602 // to branch to: this is the exit block out of the loop that we are
603 // unswitching. We need to split this if there are other loop predecessors.
604 // Because the loop is in simplified form, *any* other predecessor is enough.
605 BasicBlock *UnswitchedBB;
606 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
607 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&(static_cast <bool> (LoopExitBB->getUniquePredecessor
() == BI.getParent() && "A branch's parent isn't a predecessor!"
) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 608, __extension__
__PRETTY_FUNCTION__))
608 "A branch's parent isn't a predecessor!")(static_cast <bool> (LoopExitBB->getUniquePredecessor
() == BI.getParent() && "A branch's parent isn't a predecessor!"
) ? void (0) : __assert_fail ("LoopExitBB->getUniquePredecessor() == BI.getParent() && \"A branch's parent isn't a predecessor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 608, __extension__
__PRETTY_FUNCTION__))
;
609 UnswitchedBB = LoopExitBB;
610 } else {
611 UnswitchedBB =
612 SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
613 }
614
615 if (MSSAU && VerifyMemorySSA)
616 MSSAU->getMemorySSA()->verifyMemorySSA();
617
618 // Actually move the invariant uses into the unswitched position. If possible,
619 // we do this by moving the instructions, but when doing partial unswitching
620 // we do it by building a new merge of the values in the unswitched position.
621 OldPH->getTerminator()->eraseFromParent();
622 if (FullUnswitch) {
623 // If fully unswitching, we can use the existing branch instruction.
624 // Splice it into the old PH to gate reaching the new preheader and re-point
625 // its successors.
626 OldPH->splice(OldPH->end(), BI.getParent(), BI.getIterator());
627 BI.setCondition(Cond);
628 if (MSSAU) {
629 // Temporarily clone the terminator, to make MSSA update cheaper by
630 // separating "insert edge" updates from "remove edge" ones.
631 BI.clone()->insertInto(ParentBB, ParentBB->end());
632 } else {
633 // Create a new unconditional branch that will continue the loop as a new
634 // terminator.
635 BranchInst::Create(ContinueBB, ParentBB);
636 }
637 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
638 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
639 } else {
640 // Only unswitching a subset of inputs to the condition, so we will need to
641 // build a new branch that merges the invariant inputs.
642 if (ExitDirection)
643 assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) &&(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 645, __extension__
__PRETTY_FUNCTION__))
644 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 645, __extension__
__PRETTY_FUNCTION__))
645 "condition!")(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalOr()) && "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && \"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the \" \"condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 645, __extension__
__PRETTY_FUNCTION__))
;
646 else
647 assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) &&(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 649, __extension__
__PRETTY_FUNCTION__))
648 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 649, __extension__
__PRETTY_FUNCTION__))
649 " condition!")(static_cast <bool> (match(skipTrivialSelect(BI.getCondition
()), m_LogicalAnd()) && "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!") ? void (0) : __assert_fail ("match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && \"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the\" \" condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 649, __extension__
__PRETTY_FUNCTION__))
;
650 buildPartialUnswitchConditionalBranch(
651 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
652 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
653 }
654
655 // Update the dominator tree with the added edge.
656 DT.insertEdge(OldPH, UnswitchedBB);
657
658 // After the dominator tree was updated with the added edge, update MemorySSA
659 // if available.
660 if (MSSAU) {
661 SmallVector<CFGUpdate, 1> Updates;
662 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
663 MSSAU->applyInsertUpdates(Updates, DT);
664 }
665
666 // Finish updating dominator tree and memory ssa for full unswitch.
667 if (FullUnswitch) {
668 if (MSSAU) {
669 // Remove the cloned branch instruction.
670 ParentBB->getTerminator()->eraseFromParent();
671 // Create unconditional branch now.
672 BranchInst::Create(ContinueBB, ParentBB);
673 MSSAU->removeEdge(ParentBB, LoopExitBB);
674 }
675 DT.deleteEdge(ParentBB, LoopExitBB);
676 }
677
678 if (MSSAU && VerifyMemorySSA)
679 MSSAU->getMemorySSA()->verifyMemorySSA();
680
681 // Rewrite the relevant PHI nodes.
682 if (UnswitchedBB == LoopExitBB)
683 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
684 else
685 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
686 *ParentBB, *OldPH, FullUnswitch);
687
688 // The constant we can replace all of our invariants with inside the loop
689 // body. If any of the invariants have a value other than this the loop won't
690 // be entered.
691 ConstantInt *Replacement = ExitDirection
692 ? ConstantInt::getFalse(BI.getContext())
693 : ConstantInt::getTrue(BI.getContext());
694
695 // Since this is an i1 condition we can also trivially replace uses of it
696 // within the loop with a constant.
697 for (Value *Invariant : Invariants)
698 replaceLoopInvariantUses(L, Invariant, *Replacement);
699
700 // If this was full unswitching, we may have changed the nesting relationship
701 // for this loop so hoist it to its correct parent if needed.
702 if (FullUnswitch)
703 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
704
705 if (MSSAU && VerifyMemorySSA)
706 MSSAU->getMemorySSA()->verifyMemorySSA();
707
708 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial branch...\n"
; } } while (false)
;
709 ++NumTrivial;
710 ++NumBranches;
711 return true;
712}
713
714/// Unswitch a trivial switch if the condition is loop invariant.
715///
716/// This routine should only be called when loop code leading to the switch has
717/// been validated as trivial (no side effects). This routine checks if the
718/// condition is invariant and that at least one of the successors is a loop
719/// exit. This allows us to unswitch without duplicating the loop, making it
720/// trivial.
721///
722/// If this routine fails to unswitch the switch it returns false.
723///
724/// If the switch can be unswitched, this routine splits the preheader and
725/// copies the switch above that split. If the default case is one of the
726/// exiting cases, it copies the non-exiting cases and points them at the new
727/// preheader. If the default case is not exiting, it copies the exiting cases
728/// and points the default at the preheader. It preserves loop simplified form
729/// (splitting the exit blocks as necessary). It simplifies the switch within
730/// the loop by removing now-dead cases. If the default case is one of those
731/// unswitched, it replaces its destination with a new basic block containing
732/// only unreachable. Such basic blocks, while technically loop exits, are not
733/// considered for unswitching so this is a stable transform and the same
734/// switch will not be revisited. If after unswitching there is only a single
735/// in-loop successor, the switch is further simplified to an unconditional
736/// branch. Still more cleanup can be done with some simplifycfg like pass.
737///
738/// If `SE` is not null, it will be updated based on the potential loop SCEVs
739/// invalidated by this.
740static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
741 LoopInfo &LI, ScalarEvolution *SE,
742 MemorySSAUpdater *MSSAU) {
743 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Trying to unswitch switch: "
<< SI << "\n"; } } while (false)
;
744 Value *LoopCond = SI.getCondition();
745
746 // If this isn't switching on an invariant condition, we can't unswitch it.
747 if (!L.isLoopInvariant(LoopCond))
748 return false;
749
750 auto *ParentBB = SI.getParent();
751
752 // The same check must be used both for the default and the exit cases. We
753 // should never leave edges from the switch instruction to a basic block that
754 // we are unswitching, hence the condition used to determine the default case
755 // needs to also be used to populate ExitCaseIndices, which is then used to
756 // remove cases from the switch.
757 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
758 // BBToCheck is not an exit block if it is inside loop L.
759 if (L.contains(&BBToCheck))
760 return false;
761 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
762 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
763 return false;
764 // We do not unswitch a block that only has an unreachable statement, as
765 // it's possible this is a previously unswitched block. Only unswitch if
766 // either the terminator is not unreachable, or, if it is, it's not the only
767 // instruction in the block.
768 auto *TI = BBToCheck.getTerminator();
769 bool isUnreachable = isa<UnreachableInst>(TI);
770 return !isUnreachable ||
771 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
772 };
773
774 SmallVector<int, 4> ExitCaseIndices;
775 for (auto Case : SI.cases())
776 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
777 ExitCaseIndices.push_back(Case.getCaseIndex());
778 BasicBlock *DefaultExitBB = nullptr;
779 SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
780 SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
781 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
782 DefaultExitBB = SI.getDefaultDest();
783 } else if (ExitCaseIndices.empty())
784 return false;
785
786 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " unswitching trivial switch...\n"
; } } while (false)
;
787
788 if (MSSAU && VerifyMemorySSA)
789 MSSAU->getMemorySSA()->verifyMemorySSA();
790
791 // We may need to invalidate SCEVs for the outermost loop reached by any of
792 // the exits.
793 Loop *OuterL = &L;
794
795 if (DefaultExitBB) {
796 // Check the loop containing this exit.
797 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
798 if (!ExitL || ExitL->contains(OuterL))
799 OuterL = ExitL;
800 }
801 for (unsigned Index : ExitCaseIndices) {
802 auto CaseI = SI.case_begin() + Index;
803 // Compute the outer loop from this exit.
804 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
805 if (!ExitL || ExitL->contains(OuterL))
806 OuterL = ExitL;
807 }
808
809 if (SE) {
810 if (OuterL)
811 SE->forgetLoop(OuterL);
812 else
813 SE->forgetTopmostLoop(&L);
814 }
815
816 if (DefaultExitBB) {
817 // Clear out the default destination temporarily to allow accurate
818 // predecessor lists to be examined below.
819 SI.setDefaultDest(nullptr);
820 }
821
822 // Store the exit cases into a separate data structure and remove them from
823 // the switch.
824 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
825 SwitchInstProfUpdateWrapper::CaseWeightOpt>,
826 4> ExitCases;
827 ExitCases.reserve(ExitCaseIndices.size());
828 SwitchInstProfUpdateWrapper SIW(SI);
829 // We walk the case indices backwards so that we remove the last case first
830 // and don't disrupt the earlier indices.
831 for (unsigned Index : reverse(ExitCaseIndices)) {
832 auto CaseI = SI.case_begin() + Index;
833 // Save the value of this case.
834 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
835 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
836 // Delete the unswitched cases.
837 SIW.removeCase(CaseI);
838 }
839
840 // Check if after this all of the remaining cases point at the same
841 // successor.
842 BasicBlock *CommonSuccBB = nullptr;
843 if (SI.getNumCases() > 0 &&
844 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
845 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
846 }))
847 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
848 if (!DefaultExitBB) {
849 // If we're not unswitching the default, we need it to match any cases to
850 // have a common successor or if we have no cases it is the common
851 // successor.
852 if (SI.getNumCases() == 0)
853 CommonSuccBB = SI.getDefaultDest();
854 else if (SI.getDefaultDest() != CommonSuccBB)
855 CommonSuccBB = nullptr;
856 }
857
858 // Split the preheader, so that we know that there is a safe place to insert
859 // the switch.
860 BasicBlock *OldPH = L.getLoopPreheader();
861 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
862 OldPH->getTerminator()->eraseFromParent();
863
864 // Now add the unswitched switch.
865 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
866 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
867
868 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
869 // First, we split any exit blocks with remaining in-loop predecessors. Then
870 // we update the PHIs in one of two ways depending on if there was a split.
871 // We walk in reverse so that we split in the same order as the cases
872 // appeared. This is purely for convenience of reading the resulting IR, but
873 // it doesn't cost anything really.
874 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
875 SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
876 // Handle the default exit if necessary.
877 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
878 // ranges aren't quite powerful enough yet.
879 if (DefaultExitBB) {
880 if (pred_empty(DefaultExitBB)) {
881 UnswitchedExitBBs.insert(DefaultExitBB);
882 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
883 } else {
884 auto *SplitBB =
885 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
886 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
887 *ParentBB, *OldPH,
888 /*FullUnswitch*/ true);
889 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
890 }
891 }
892 // Note that we must use a reference in the for loop so that we update the
893 // container.
894 for (auto &ExitCase : reverse(ExitCases)) {
895 // Grab a reference to the exit block in the pair so that we can update it.
896 BasicBlock *ExitBB = std::get<1>(ExitCase);
897
898 // If this case is the last edge into the exit block, we can simply reuse it
899 // as it will no longer be a loop exit. No mapping necessary.
900 if (pred_empty(ExitBB)) {
901 // Only rewrite once.
902 if (UnswitchedExitBBs.insert(ExitBB).second)
903 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
904 continue;
905 }
906
907 // Otherwise we need to split the exit block so that we retain an exit
908 // block from the loop and a target for the unswitched condition.
909 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
910 if (!SplitExitBB) {
911 // If this is the first time we see this, do the split and remember it.
912 SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
913 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
914 *ParentBB, *OldPH,
915 /*FullUnswitch*/ true);
916 }
917 // Update the case pair to point to the split block.
918 std::get<1>(ExitCase) = SplitExitBB;
919 }
920
921 // Now add the unswitched cases. We do this in reverse order as we built them
922 // in reverse order.
923 for (auto &ExitCase : reverse(ExitCases)) {
924 ConstantInt *CaseVal = std::get<0>(ExitCase);
925 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
926
927 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
928 }
929
930 // If the default was unswitched, re-point it and add explicit cases for
931 // entering the loop.
932 if (DefaultExitBB) {
933 NewSIW->setDefaultDest(DefaultExitBB);
934 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
935
936 // We removed all the exit cases, so we just copy the cases to the
937 // unswitched switch.
938 for (const auto &Case : SI.cases())
939 NewSIW.addCase(Case.getCaseValue(), NewPH,
940 SIW.getSuccessorWeight(Case.getSuccessorIndex()));
941 } else if (DefaultCaseWeight) {
942 // We have to set branch weight of the default case.
943 uint64_t SW = *DefaultCaseWeight;
944 for (const auto &Case : SI.cases()) {
945 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
946 assert(W &&(static_cast <bool> (W && "case weight must be defined as default case weight is defined"
) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 947, __extension__
__PRETTY_FUNCTION__))
947 "case weight must be defined as default case weight is defined")(static_cast <bool> (W && "case weight must be defined as default case weight is defined"
) ? void (0) : __assert_fail ("W && \"case weight must be defined as default case weight is defined\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 947, __extension__
__PRETTY_FUNCTION__))
;
948 SW += *W;
949 }
950 NewSIW.setSuccessorWeight(0, SW);
951 }
952
953 // If we ended up with a common successor for every path through the switch
954 // after unswitching, rewrite it to an unconditional branch to make it easy
955 // to recognize. Otherwise we potentially have to recognize the default case
956 // pointing at unreachable and other complexity.
957 if (CommonSuccBB) {
958 BasicBlock *BB = SI.getParent();
959 // We may have had multiple edges to this common successor block, so remove
960 // them as predecessors. We skip the first one, either the default or the
961 // actual first case.
962 bool SkippedFirst = DefaultExitBB == nullptr;
963 for (auto Case : SI.cases()) {
964 assert(Case.getCaseSuccessor() == CommonSuccBB &&(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB
&& "Non-common successor!") ? void (0) : __assert_fail
("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 965, __extension__
__PRETTY_FUNCTION__))
965 "Non-common successor!")(static_cast <bool> (Case.getCaseSuccessor() == CommonSuccBB
&& "Non-common successor!") ? void (0) : __assert_fail
("Case.getCaseSuccessor() == CommonSuccBB && \"Non-common successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 965, __extension__
__PRETTY_FUNCTION__))
;
966 (void)Case;
967 if (!SkippedFirst) {
968 SkippedFirst = true;
969 continue;
970 }
971 CommonSuccBB->removePredecessor(BB,
972 /*KeepOneInputPHIs*/ true);
973 }
974 // Now nuke the switch and replace it with a direct branch.
975 SIW.eraseFromParent();
976 BranchInst::Create(CommonSuccBB, BB);
977 } else if (DefaultExitBB) {
978 assert(SI.getNumCases() > 0 &&(static_cast <bool> (SI.getNumCases() > 0 &&
"If we had no cases we'd have a common successor!") ? void (
0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 979, __extension__
__PRETTY_FUNCTION__))
979 "If we had no cases we'd have a common successor!")(static_cast <bool> (SI.getNumCases() > 0 &&
"If we had no cases we'd have a common successor!") ? void (
0) : __assert_fail ("SI.getNumCases() > 0 && \"If we had no cases we'd have a common successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 979, __extension__
__PRETTY_FUNCTION__))
;
980 // Move the last case to the default successor. This is valid as if the
981 // default got unswitched it cannot be reached. This has the advantage of
982 // being simple and keeping the number of edges from this switch to
983 // successors the same, and avoiding any PHI update complexity.
984 auto LastCaseI = std::prev(SI.case_end());
985
986 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
987 SIW.setSuccessorWeight(
988 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
989 SIW.removeCase(LastCaseI);
990 }
991
992 // Walk the unswitched exit blocks and the unswitched split blocks and update
993 // the dominator tree based on the CFG edits. While we are walking unordered
994 // containers here, the API for applyUpdates takes an unordered list of
995 // updates and requires them to not contain duplicates.
996 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
997 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
998 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
999 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1000 }
1001 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1002 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1003 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1004 }
1005
1006 if (MSSAU) {
1007 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1008 if (VerifyMemorySSA)
1009 MSSAU->getMemorySSA()->verifyMemorySSA();
1010 } else {
1011 DT.applyUpdates(DTUpdates);
1012 }
1013
1014 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1014, __extension__
__PRETTY_FUNCTION__))
;
1015
1016 // We may have changed the nesting relationship for this loop so hoist it to
1017 // its correct parent if needed.
1018 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1019
1020 if (MSSAU && VerifyMemorySSA)
1021 MSSAU->getMemorySSA()->verifyMemorySSA();
1022
1023 ++NumTrivial;
1024 ++NumSwitches;
1025 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " done: unswitching trivial switch...\n"
; } } while (false)
;
1026 return true;
1027}
1028
1029/// This routine scans the loop to find a branch or switch which occurs before
1030/// any side effects occur. These can potentially be unswitched without
1031/// duplicating the loop. If a branch or switch is successfully unswitched the
1032/// scanning continues to see if subsequent branches or switches have become
1033/// trivial. Once all trivial candidates have been unswitched, this routine
1034/// returns.
1035///
1036/// The return value indicates whether anything was unswitched (and therefore
1037/// changed).
1038///
1039/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1040/// invalidated by this.
1041static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
1042 LoopInfo &LI, ScalarEvolution *SE,
1043 MemorySSAUpdater *MSSAU) {
1044 bool Changed = false;
1045
1046 // If loop header has only one reachable successor we should keep looking for
1047 // trivial condition candidates in the successor as well. An alternative is
1048 // to constant fold conditions and merge successors into loop header (then we
1049 // only need to check header's terminator). The reason for not doing this in
1050 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1051 // invariants. Folding dead branches could either eliminate the current loop
1052 // or make other loops unreachable. LCSSA form might also not be preserved
1053 // after deleting branches. The following code keeps traversing loop header's
1054 // successors until it finds the trivial condition candidate (condition that
1055 // is not a constant). Since unswitching generates branches with constant
1056 // conditions, this scenario could be very common in practice.
1057 BasicBlock *CurrentBB = L.getHeader();
1058 SmallPtrSet<BasicBlock *, 8> Visited;
1059 Visited.insert(CurrentBB);
1060 do {
1061 // Check if there are any side-effecting instructions (e.g. stores, calls,
1062 // volatile loads) in the part of the loop that the code *would* execute
1063 // without unswitching.
1064 if (MSSAU) // Possible early exit with MSSA
1065 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1066 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1067 return Changed;
1068 if (llvm::any_of(*CurrentBB,
1069 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1070 return Changed;
1071
1072 Instruction *CurrentTerm = CurrentBB->getTerminator();
1073
1074 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1075 // Don't bother trying to unswitch past a switch with a constant
1076 // condition. This should be removed prior to running this pass by
1077 // simplifycfg.
1078 if (isa<Constant>(SI->getCondition()))
1079 return Changed;
1080
1081 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1082 // Couldn't unswitch this one so we're done.
1083 return Changed;
1084
1085 // Mark that we managed to unswitch something.
1086 Changed = true;
1087
1088 // If unswitching turned the terminator into an unconditional branch then
1089 // we can continue. The unswitching logic specifically works to fold any
1090 // cases it can into an unconditional branch to make it easier to
1091 // recognize here.
1092 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1093 if (!BI || BI->isConditional())
1094 return Changed;
1095
1096 CurrentBB = BI->getSuccessor(0);
1097 continue;
1098 }
1099
1100 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1101 if (!BI)
1102 // We do not understand other terminator instructions.
1103 return Changed;
1104
1105 // Don't bother trying to unswitch past an unconditional branch or a branch
1106 // with a constant value. These should be removed by simplifycfg prior to
1107 // running this pass.
1108 if (!BI->isConditional() ||
1109 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1110 return Changed;
1111
1112 // Found a trivial condition candidate: non-foldable conditional branch. If
1113 // we fail to unswitch this, we can't do anything else that is trivial.
1114 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1115 return Changed;
1116
1117 // Mark that we managed to unswitch something.
1118 Changed = true;
1119
1120 // If we only unswitched some of the conditions feeding the branch, we won't
1121 // have collapsed it to a single successor.
1122 BI = cast<BranchInst>(CurrentBB->getTerminator());
1123 if (BI->isConditional())
1124 return Changed;
1125
1126 // Follow the newly unconditional branch into its successor.
1127 CurrentBB = BI->getSuccessor(0);
1128
1129 // When continuing, if we exit the loop or reach a previous visited block,
1130 // then we can not reach any trivial condition candidates (unfoldable
1131 // branch instructions or switch instructions) and no unswitch can happen.
1132 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1133
1134 return Changed;
1135}
1136
1137/// Build the cloned blocks for an unswitched copy of the given loop.
1138///
1139/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1140/// after the split block (`SplitBB`) that will be used to select between the
1141/// cloned and original loop.
1142///
1143/// This routine handles cloning all of the necessary loop blocks and exit
1144/// blocks including rewriting their instructions and the relevant PHI nodes.
1145/// Any loop blocks or exit blocks which are dominated by a different successor
1146/// than the one for this clone of the loop blocks can be trivially skipped. We
1147/// use the `DominatingSucc` map to determine whether a block satisfies that
1148/// property with a simple map lookup.
1149///
1150/// It also correctly creates the unconditional branch in the cloned
1151/// unswitched parent block to only point at the unswitched successor.
1152///
1153/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1154/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1155/// the cloned blocks (and their loops) are left without full `LoopInfo`
1156/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1157/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1158/// instead the caller must recompute an accurate DT. It *does* correctly
1159/// update the `AssumptionCache` provided in `AC`.
1160static BasicBlock *buildClonedLoopBlocks(
1161 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1162 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1163 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1164 const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1165 ValueToValueMapTy &VMap,
1166 SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
1167 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1168 ScalarEvolution *SE) {
1169 SmallVector<BasicBlock *, 4> NewBlocks;
1170 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1171
1172 // We will need to clone a bunch of blocks, wrap up the clone operation in
1173 // a helper.
1174 auto CloneBlock = [&](BasicBlock *OldBB) {
1175 // Clone the basic block and insert it before the new preheader.
1176 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1177 NewBB->moveBefore(LoopPH);
1178
1179 // Record this block and the mapping.
1180 NewBlocks.push_back(NewBB);
1181 VMap[OldBB] = NewBB;
1182
1183 return NewBB;
1184 };
1185
1186 // We skip cloning blocks when they have a dominating succ that is not the
1187 // succ we are cloning for.
1188 auto SkipBlock = [&](BasicBlock *BB) {
1189 auto It = DominatingSucc.find(BB);
1190 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1191 };
1192
1193 // First, clone the preheader.
1194 auto *ClonedPH = CloneBlock(LoopPH);
1195
1196 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1197 for (auto *LoopBB : L.blocks())
1198 if (!SkipBlock(LoopBB))
1199 CloneBlock(LoopBB);
1200
1201 // Split all the loop exit edges so that when we clone the exit blocks, if
1202 // any of the exit blocks are *also* a preheader for some other loop, we
1203 // don't create multiple predecessors entering the loop header.
1204 for (auto *ExitBB : ExitBlocks) {
1205 if (SkipBlock(ExitBB))
1206 continue;
1207
1208 // When we are going to clone an exit, we don't need to clone all the
1209 // instructions in the exit block and we want to ensure we have an easy
1210 // place to merge the CFG, so split the exit first. This is always safe to
1211 // do because there cannot be any non-loop predecessors of a loop exit in
1212 // loop simplified form.
1213 auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1214
1215 // Rearrange the names to make it easier to write test cases by having the
1216 // exit block carry the suffix rather than the merge block carrying the
1217 // suffix.
1218 MergeBB->takeName(ExitBB);
1219 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1220
1221 // Now clone the original exit block.
1222 auto *ClonedExitBB = CloneBlock(ExitBB);
1223 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&(static_cast <bool> (ClonedExitBB->getTerminator()->
getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1224, __extension__
__PRETTY_FUNCTION__))
1224 "Exit block should have been split to have one successor!")(static_cast <bool> (ClonedExitBB->getTerminator()->
getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && \"Exit block should have been split to have one successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1224, __extension__
__PRETTY_FUNCTION__))
;
1225 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&(static_cast <bool> (ClonedExitBB->getTerminator()->
getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1226, __extension__
__PRETTY_FUNCTION__))
1226 "Cloned exit block has the wrong successor!")(static_cast <bool> (ClonedExitBB->getTerminator()->
getSuccessor(0) == MergeBB && "Cloned exit block has the wrong successor!"
) ? void (0) : __assert_fail ("ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && \"Cloned exit block has the wrong successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1226, __extension__
__PRETTY_FUNCTION__))
;
1227
1228 // Remap any cloned instructions and create a merge phi node for them.
1229 for (auto ZippedInsts : llvm::zip_first(
1230 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1231 llvm::make_range(ClonedExitBB->begin(),
1232 std::prev(ClonedExitBB->end())))) {
1233 Instruction &I = std::get<0>(ZippedInsts);
1234 Instruction &ClonedI = std::get<1>(ZippedInsts);
1235
1236 // The only instructions in the exit block should be PHI nodes and
1237 // potentially a landing pad.
1238 assert((static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1240, __extension__
__PRETTY_FUNCTION__))
1239 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1240, __extension__
__PRETTY_FUNCTION__))
1240 "Bad instruction in exit block!")(static_cast <bool> ((isa<PHINode>(I) || isa<LandingPadInst
>(I) || isa<CatchPadInst>(I)) && "Bad instruction in exit block!"
) ? void (0) : __assert_fail ("(isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && \"Bad instruction in exit block!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1240, __extension__
__PRETTY_FUNCTION__))
;
1241 // We should have a value map between the instruction and its clone.
1242 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!")(static_cast <bool> (VMap.lookup(&I) == &ClonedI
&& "Mismatch in the value map!") ? void (0) : __assert_fail
("VMap.lookup(&I) == &ClonedI && \"Mismatch in the value map!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1242, __extension__
__PRETTY_FUNCTION__))
;
1243
1244 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1245 if (SE && isa<PHINode>(I))
1246 SE->forgetValue(&I);
1247
1248 auto *MergePN =
1249 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1250 &*MergeBB->getFirstInsertionPt());
1251 I.replaceAllUsesWith(MergePN);
1252 MergePN->addIncoming(&I, ExitBB);
1253 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1254 }
1255 }
1256
1257 // Rewrite the instructions in the cloned blocks to refer to the instructions
1258 // in the cloned blocks. We have to do this as a second pass so that we have
1259 // everything available. Also, we have inserted new instructions which may
1260 // include assume intrinsics, so we update the assumption cache while
1261 // processing this.
1262 for (auto *ClonedBB : NewBlocks)
1263 for (Instruction &I : *ClonedBB) {
1264 RemapInstruction(&I, VMap,
1265 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1266 if (auto *II = dyn_cast<AssumeInst>(&I))
1267 AC.registerAssumption(II);
1268 }
1269
1270 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1271 // have spurious incoming values.
1272 for (auto *LoopBB : L.blocks())
1273 if (SkipBlock(LoopBB))
1274 for (auto *SuccBB : successors(LoopBB))
1275 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1276 for (PHINode &PN : ClonedSuccBB->phis())
1277 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1278
1279 // Remove the cloned parent as a predecessor of any successor we ended up
1280 // cloning other than the unswitched one.
1281 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1282 for (auto *SuccBB : successors(ParentBB)) {
1283 if (SuccBB == UnswitchedSuccBB)
1284 continue;
1285
1286 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1287 if (!ClonedSuccBB)
1288 continue;
1289
1290 ClonedSuccBB->removePredecessor(ClonedParentBB,
1291 /*KeepOneInputPHIs*/ true);
1292 }
1293
1294 // Replace the cloned branch with an unconditional branch to the cloned
1295 // unswitched successor.
1296 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1297 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1298 // Trivial Simplification. If Terminator is a conditional branch and
1299 // condition becomes dead - erase it.
1300 Value *ClonedConditionToErase = nullptr;
1301 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1302 ClonedConditionToErase = BI->getCondition();
1303 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1304 ClonedConditionToErase = SI->getCondition();
1305
1306 ClonedTerminator->eraseFromParent();
1307 BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1308
1309 if (ClonedConditionToErase)
1310 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1311 MSSAU);
1312
1313 // If there are duplicate entries in the PHI nodes because of multiple edges
1314 // to the unswitched successor, we need to nuke all but one as we replaced it
1315 // with a direct branch.
1316 for (PHINode &PN : ClonedSuccBB->phis()) {
1317 bool Found = false;
1318 // Loop over the incoming operands backwards so we can easily delete as we
1319 // go without invalidating the index.
1320 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1321 if (PN.getIncomingBlock(i) != ClonedParentBB)
1322 continue;
1323 if (!Found) {
1324 Found = true;
1325 continue;
1326 }
1327 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1328 }
1329 }
1330
1331 // Record the domtree updates for the new blocks.
1332 SmallPtrSet<BasicBlock *, 4> SuccSet;
1333 for (auto *ClonedBB : NewBlocks) {
1334 for (auto *SuccBB : successors(ClonedBB))
1335 if (SuccSet.insert(SuccBB).second)
1336 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1337 SuccSet.clear();
1338 }
1339
1340 return ClonedPH;
1341}
1342
1343/// Recursively clone the specified loop and all of its children.
1344///
1345/// The target parent loop for the clone should be provided, or can be null if
1346/// the clone is a top-level loop. While cloning, all the blocks are mapped
1347/// with the provided value map. The entire original loop must be present in
1348/// the value map. The cloned loop is returned.
1349static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1350 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1351 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1352 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!")(static_cast <bool> (ClonedL.getBlocks().empty() &&
"Must start with an empty loop!") ? void (0) : __assert_fail
("ClonedL.getBlocks().empty() && \"Must start with an empty loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1352, __extension__
__PRETTY_FUNCTION__))
;
1353 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1354 for (auto *BB : OrigL.blocks()) {
1355 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1356 ClonedL.addBlockEntry(ClonedBB);
1357 if (LI.getLoopFor(BB) == &OrigL)
1358 LI.changeLoopFor(ClonedBB, &ClonedL);
1359 }
1360 };
1361
1362 // We specially handle the first loop because it may get cloned into
1363 // a different parent and because we most commonly are cloning leaf loops.
1364 Loop *ClonedRootL = LI.AllocateLoop();
1365 if (RootParentL)
1366 RootParentL->addChildLoop(ClonedRootL);
1367 else
1368 LI.addTopLevelLoop(ClonedRootL);
1369 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1370
1371 if (OrigRootL.isInnermost())
1372 return ClonedRootL;
1373
1374 // If we have a nest, we can quickly clone the entire loop nest using an
1375 // iterative approach because it is a tree. We keep the cloned parent in the
1376 // data structure to avoid repeatedly querying through a map to find it.
1377 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1378 // Build up the loops to clone in reverse order as we'll clone them from the
1379 // back.
1380 for (Loop *ChildL : llvm::reverse(OrigRootL))
1381 LoopsToClone.push_back({ClonedRootL, ChildL});
1382 do {
1383 Loop *ClonedParentL, *L;
1384 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1385 Loop *ClonedL = LI.AllocateLoop();
1386 ClonedParentL->addChildLoop(ClonedL);
1387 AddClonedBlocksToLoop(*L, *ClonedL);
1388 for (Loop *ChildL : llvm::reverse(*L))
1389 LoopsToClone.push_back({ClonedL, ChildL});
1390 } while (!LoopsToClone.empty());
1391
1392 return ClonedRootL;
1393}
1394
1395/// Build the cloned loops of an original loop from unswitching.
1396///
1397/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1398/// operation. We need to re-verify that there even is a loop (as the backedge
1399/// may not have been cloned), and even if there are remaining backedges the
1400/// backedge set may be different. However, we know that each child loop is
1401/// undisturbed, we only need to find where to place each child loop within
1402/// either any parent loop or within a cloned version of the original loop.
1403///
1404/// Because child loops may end up cloned outside of any cloned version of the
1405/// original loop, multiple cloned sibling loops may be created. All of them
1406/// are returned so that the newly introduced loop nest roots can be
1407/// identified.
1408static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1409 const ValueToValueMapTy &VMap, LoopInfo &LI,
1410 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1411 Loop *ClonedL = nullptr;
1412
1413 auto *OrigPH = OrigL.getLoopPreheader();
1414 auto *OrigHeader = OrigL.getHeader();
1415
1416 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1417 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1418
1419 // We need to know the loops of the cloned exit blocks to even compute the
1420 // accurate parent loop. If we only clone exits to some parent of the
1421 // original parent, we want to clone into that outer loop. We also keep track
1422 // of the loops that our cloned exit blocks participate in.
1423 Loop *ParentL = nullptr;
1424 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1425 SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
1426 ClonedExitsInLoops.reserve(ExitBlocks.size());
1427 for (auto *ExitBB : ExitBlocks)
1428 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1429 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1430 ExitLoopMap[ClonedExitBB] = ExitL;
1431 ClonedExitsInLoops.push_back(ClonedExitBB);
1432 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1433 ParentL = ExitL;
1434 }
1435 assert((!ParentL || ParentL == OrigL.getParentLoop() ||(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1438, __extension__
__PRETTY_FUNCTION__))
1436 ParentL->contains(OrigL.getParentLoop())) &&(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1438, __extension__
__PRETTY_FUNCTION__))
1437 "The computed parent loop should always contain (or be) the parent of "(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1438, __extension__
__PRETTY_FUNCTION__))
1438 "the original loop.")(static_cast <bool> ((!ParentL || ParentL == OrigL.getParentLoop
() || ParentL->contains(OrigL.getParentLoop())) &&
"The computed parent loop should always contain (or be) the parent of "
"the original loop.") ? void (0) : __assert_fail ("(!ParentL || ParentL == OrigL.getParentLoop() || ParentL->contains(OrigL.getParentLoop())) && \"The computed parent loop should always contain (or be) the parent of \" \"the original loop.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1438, __extension__
__PRETTY_FUNCTION__))
;
1439
1440 // We build the set of blocks dominated by the cloned header from the set of
1441 // cloned blocks out of the original loop. While not all of these will
1442 // necessarily be in the cloned loop, it is enough to establish that they
1443 // aren't in unreachable cycles, etc.
1444 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1445 for (auto *BB : OrigL.blocks())
1446 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1447 ClonedLoopBlocks.insert(ClonedBB);
1448
1449 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1450 // skipped cloning some region of this loop which can in turn skip some of
1451 // the backedges so we have to rebuild the blocks in the loop based on the
1452 // backedges that remain after cloning.
1453 SmallVector<BasicBlock *, 16> Worklist;
1454 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1455 for (auto *Pred : predecessors(ClonedHeader)) {
1456 // The only possible non-loop header predecessor is the preheader because
1457 // we know we cloned the loop in simplified form.
1458 if (Pred == ClonedPH)
1459 continue;
1460
1461 // Because the loop was in simplified form, the only non-loop predecessor
1462 // should be the preheader.
1463 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1465, __extension__
__PRETTY_FUNCTION__))
1464 "header other than the preheader "(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1465, __extension__
__PRETTY_FUNCTION__))
1465 "that is not part of the loop!")(static_cast <bool> (ClonedLoopBlocks.count(Pred) &&
"Found a predecessor of the loop " "header other than the preheader "
"that is not part of the loop!") ? void (0) : __assert_fail (
"ClonedLoopBlocks.count(Pred) && \"Found a predecessor of the loop \" \"header other than the preheader \" \"that is not part of the loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1465, __extension__
__PRETTY_FUNCTION__))
;
1466
1467 // Insert this block into the loop set and on the first visit (and if it
1468 // isn't the header we're currently walking) put it into the worklist to
1469 // recurse through.
1470 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1471 Worklist.push_back(Pred);
1472 }
1473
1474 // If we had any backedges then there *is* a cloned loop. Put the header into
1475 // the loop set and then walk the worklist backwards to find all the blocks
1476 // that remain within the loop after cloning.
1477 if (!BlocksInClonedLoop.empty()) {
1478 BlocksInClonedLoop.insert(ClonedHeader);
1479
1480 while (!Worklist.empty()) {
1481 BasicBlock *BB = Worklist.pop_back_val();
1482 assert(BlocksInClonedLoop.count(BB) &&(static_cast <bool> (BlocksInClonedLoop.count(BB) &&
"Didn't put block into the loop set!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1483, __extension__
__PRETTY_FUNCTION__))
1483 "Didn't put block into the loop set!")(static_cast <bool> (BlocksInClonedLoop.count(BB) &&
"Didn't put block into the loop set!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count(BB) && \"Didn't put block into the loop set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1483, __extension__
__PRETTY_FUNCTION__))
;
1484
1485 // Insert any predecessors that are in the possible set into the cloned
1486 // set, and if the insert is successful, add them to the worklist. Note
1487 // that we filter on the blocks that are definitely reachable via the
1488 // backedge to the loop header so we may prune out dead code within the
1489 // cloned loop.
1490 for (auto *Pred : predecessors(BB))
1491 if (ClonedLoopBlocks.count(Pred) &&
1492 BlocksInClonedLoop.insert(Pred).second)
1493 Worklist.push_back(Pred);
1494 }
1495
1496 ClonedL = LI.AllocateLoop();
1497 if (ParentL) {
1498 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1499 ParentL->addChildLoop(ClonedL);
1500 } else {
1501 LI.addTopLevelLoop(ClonedL);
1502 }
1503 NonChildClonedLoops.push_back(ClonedL);
1504
1505 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1506 // We don't want to just add the cloned loop blocks based on how we
1507 // discovered them. The original order of blocks was carefully built in
1508 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1509 // that logic, we just re-walk the original blocks (and those of the child
1510 // loops) and filter them as we add them into the cloned loop.
1511 for (auto *BB : OrigL.blocks()) {
1512 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1513 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1514 continue;
1515
1516 // Directly add the blocks that are only in this loop.
1517 if (LI.getLoopFor(BB) == &OrigL) {
1518 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1519 continue;
1520 }
1521
1522 // We want to manually add it to this loop and parents.
1523 // Registering it with LoopInfo will happen when we clone the top
1524 // loop for this block.
1525 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1526 PL->addBlockEntry(ClonedBB);
1527 }
1528
1529 // Now add each child loop whose header remains within the cloned loop. All
1530 // of the blocks within the loop must satisfy the same constraints as the
1531 // header so once we pass the header checks we can just clone the entire
1532 // child loop nest.
1533 for (Loop *ChildL : OrigL) {
1534 auto *ClonedChildHeader =
1535 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1536 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1537 continue;
1538
1539#ifndef NDEBUG
1540 // We should never have a cloned child loop header but fail to have
1541 // all of the blocks for that child loop.
1542 for (auto *ChildLoopBB : ChildL->blocks())
1543 assert(BlocksInClonedLoop.count((static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1546, __extension__
__PRETTY_FUNCTION__))
1544 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1546, __extension__
__PRETTY_FUNCTION__))
1545 "Child cloned loop has a header within the cloned outer "(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1546, __extension__
__PRETTY_FUNCTION__))
1546 "loop but not all of its blocks!")(static_cast <bool> (BlocksInClonedLoop.count( cast<
BasicBlock>(VMap.lookup(ChildLoopBB))) && "Child cloned loop has a header within the cloned outer "
"loop but not all of its blocks!") ? void (0) : __assert_fail
("BlocksInClonedLoop.count( cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && \"Child cloned loop has a header within the cloned outer \" \"loop but not all of its blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1546, __extension__
__PRETTY_FUNCTION__))
;
1547#endif
1548
1549 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1550 }
1551 }
1552
1553 // Now that we've handled all the components of the original loop that were
1554 // cloned into a new loop, we still need to handle anything from the original
1555 // loop that wasn't in a cloned loop.
1556
1557 // Figure out what blocks are left to place within any loop nest containing
1558 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1559 // them.
1560 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1561 if (BlocksInClonedLoop.empty())
1562 UnloopedBlockSet.insert(ClonedPH);
1563 for (auto *ClonedBB : ClonedLoopBlocks)
1564 if (!BlocksInClonedLoop.count(ClonedBB))
1565 UnloopedBlockSet.insert(ClonedBB);
1566
1567 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1568 // backwards across these to process them inside out. The order shouldn't
1569 // matter as we're just trying to build up the map from inside-out; we use
1570 // the map in a more stably ordered way below.
1571 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1572 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1573 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1574 ExitLoopMap.lookup(RHS)->getLoopDepth();
1575 });
1576
1577 // Populate the existing ExitLoopMap with everything reachable from each
1578 // exit, starting from the inner most exit.
1579 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1580 assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!"
) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1580, __extension__
__PRETTY_FUNCTION__))
;
1581
1582 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1583 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1584
1585 // Walk the CFG back until we hit the cloned PH adding everything reachable
1586 // and in the unlooped set to this exit block's loop.
1587 Worklist.push_back(ExitBB);
1588 do {
1589 BasicBlock *BB = Worklist.pop_back_val();
1590 // We can stop recursing at the cloned preheader (if we get there).
1591 if (BB == ClonedPH)
1592 continue;
1593
1594 for (BasicBlock *PredBB : predecessors(BB)) {
1595 // If this pred has already been moved to our set or is part of some
1596 // (inner) loop, no update needed.
1597 if (!UnloopedBlockSet.erase(PredBB)) {
1598 assert((static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1600, __extension__
__PRETTY_FUNCTION__))
1599 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1600, __extension__
__PRETTY_FUNCTION__))
1600 "Predecessor not mapped to a loop!")(static_cast <bool> ((BlocksInClonedLoop.count(PredBB) ||
ExitLoopMap.count(PredBB)) && "Predecessor not mapped to a loop!"
) ? void (0) : __assert_fail ("(BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && \"Predecessor not mapped to a loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1600, __extension__
__PRETTY_FUNCTION__))
;
1601 continue;
1602 }
1603
1604 // We just insert into the loop set here. We'll add these blocks to the
1605 // exit loop after we build up the set in an order that doesn't rely on
1606 // predecessor order (which in turn relies on use list order).
1607 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1608 (void)Inserted;
1609 assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!"
) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1609, __extension__
__PRETTY_FUNCTION__))
;
1610
1611 // And recurse through to its predecessors.
1612 Worklist.push_back(PredBB);
1613 }
1614 } while (!Worklist.empty());
1615 }
1616
1617 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1618 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1619 // in their original order adding them to the correct loop.
1620
1621 // We need a stable insertion order. We use the order of the original loop
1622 // order and map into the correct parent loop.
1623 for (auto *BB : llvm::concat<BasicBlock *const>(
1624 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1625 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1626 OuterL->addBasicBlockToLoop(BB, LI);
1627
1628#ifndef NDEBUG
1629 for (auto &BBAndL : ExitLoopMap) {
1630 auto *BB = BBAndL.first;
1631 auto *OuterL = BBAndL.second;
1632 assert(LI.getLoopFor(BB) == OuterL &&(static_cast <bool> (LI.getLoopFor(BB) == OuterL &&
"Failed to put all blocks into outer loops!") ? void (0) : __assert_fail
("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1633, __extension__
__PRETTY_FUNCTION__))
1633 "Failed to put all blocks into outer loops!")(static_cast <bool> (LI.getLoopFor(BB) == OuterL &&
"Failed to put all blocks into outer loops!") ? void (0) : __assert_fail
("LI.getLoopFor(BB) == OuterL && \"Failed to put all blocks into outer loops!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1633, __extension__
__PRETTY_FUNCTION__))
;
1634 }
1635#endif
1636
1637 // Now that all the blocks are placed into the correct containing loop in the
1638 // absence of child loops, find all the potentially cloned child loops and
1639 // clone them into whatever outer loop we placed their header into.
1640 for (Loop *ChildL : OrigL) {
1641 auto *ClonedChildHeader =
1642 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1643 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1644 continue;
1645
1646#ifndef NDEBUG
1647 for (auto *ChildLoopBB : ChildL->blocks())
1648 assert(VMap.count(ChildLoopBB) &&(static_cast <bool> (VMap.count(ChildLoopBB) &&
"Cloned a child loop header but not all of that loops blocks!"
) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1649, __extension__
__PRETTY_FUNCTION__))
1649 "Cloned a child loop header but not all of that loops blocks!")(static_cast <bool> (VMap.count(ChildLoopBB) &&
"Cloned a child loop header but not all of that loops blocks!"
) ? void (0) : __assert_fail ("VMap.count(ChildLoopBB) && \"Cloned a child loop header but not all of that loops blocks!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1649, __extension__
__PRETTY_FUNCTION__))
;
1650#endif
1651
1652 NonChildClonedLoops.push_back(cloneLoopNest(
1653 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1654 }
1655}
1656
1657static void
1658deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1659 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1660 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1661 // Find all the dead clones, and remove them from their successors.
1662 SmallVector<BasicBlock *, 16> DeadBlocks;
1663 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1664 for (const auto &VMap : VMaps)
1665 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1666 if (!DT.isReachableFromEntry(ClonedBB)) {
1667 for (BasicBlock *SuccBB : successors(ClonedBB))
1668 SuccBB->removePredecessor(ClonedBB);
1669 DeadBlocks.push_back(ClonedBB);
1670 }
1671
1672 // Remove all MemorySSA in the dead blocks
1673 if (MSSAU) {
1674 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1675 DeadBlocks.end());
1676 MSSAU->removeBlocks(DeadBlockSet);
1677 }
1678
1679 // Drop any remaining references to break cycles.
1680 for (BasicBlock *BB : DeadBlocks)
1681 BB->dropAllReferences();
1682 // Erase them from the IR.
1683 for (BasicBlock *BB : DeadBlocks)
1684 BB->eraseFromParent();
1685}
1686
1687static void
1688deleteDeadBlocksFromLoop(Loop &L,
1689 SmallVectorImpl<BasicBlock *> &ExitBlocks,
1690 DominatorTree &DT, LoopInfo &LI,
1691 MemorySSAUpdater *MSSAU,
1692 ScalarEvolution *SE,
1693 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
1694 // Find all the dead blocks tied to this loop, and remove them from their
1695 // successors.
1696 SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1697
1698 // Start with loop/exit blocks and get a transitive closure of reachable dead
1699 // blocks.
1700 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1701 ExitBlocks.end());
1702 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1703 while (!DeathCandidates.empty()) {
1704 auto *BB = DeathCandidates.pop_back_val();
1705 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1706 for (BasicBlock *SuccBB : successors(BB)) {
1707 SuccBB->removePredecessor(BB);
1708 DeathCandidates.push_back(SuccBB);
1709 }
1710 DeadBlockSet.insert(BB);
1711 }
1712 }
1713
1714 // Remove all MemorySSA in the dead blocks
1715 if (MSSAU)
1716 MSSAU->removeBlocks(DeadBlockSet);
1717
1718 // Filter out the dead blocks from the exit blocks list so that it can be
1719 // used in the caller.
1720 llvm::erase_if(ExitBlocks,
1721 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1722
1723 // Walk from this loop up through its parents removing all of the dead blocks.
1724 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1725 for (auto *BB : DeadBlockSet)
1726 ParentL->getBlocksSet().erase(BB);
1727 llvm::erase_if(ParentL->getBlocksVector(),
1728 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1729 }
1730
1731 // Now delete the dead child loops. This raw delete will clear them
1732 // recursively.
1733 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1734 if (!DeadBlockSet.count(ChildL->getHeader()))
1735 return false;
1736
1737 assert(llvm::all_of(ChildL->blocks(),(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__
__PRETTY_FUNCTION__))
1738 [&](BasicBlock *ChildBB) {(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__
__PRETTY_FUNCTION__))
1739 return DeadBlockSet.count(ChildBB);(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__
__PRETTY_FUNCTION__))
1740 }) &&(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__
__PRETTY_FUNCTION__))
1741 "If the child loop header is dead all blocks in the child loop must "(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__
__PRETTY_FUNCTION__))
1742 "be dead as well!")(static_cast <bool> (llvm::all_of(ChildL->blocks(), [
&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB
); }) && "If the child loop header is dead all blocks in the child loop must "
"be dead as well!") ? void (0) : __assert_fail ("llvm::all_of(ChildL->blocks(), [&](BasicBlock *ChildBB) { return DeadBlockSet.count(ChildBB); }) && \"If the child loop header is dead all blocks in the child loop must \" \"be dead as well!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1742, __extension__
__PRETTY_FUNCTION__))
;
1743 DestroyLoopCB(*ChildL, ChildL->getName());
1744 if (SE)
1745 SE->forgetBlockAndLoopDispositions();
1746 LI.destroy(ChildL);
1747 return true;
1748 });
1749
1750 // Remove the loop mappings for the dead blocks and drop all the references
1751 // from these blocks to others to handle cyclic references as we start
1752 // deleting the blocks themselves.
1753 for (auto *BB : DeadBlockSet) {
1754 // Check that the dominator tree has already been updated.
1755 assert(!DT.getNode(BB) && "Should already have cleared domtree!")(static_cast <bool> (!DT.getNode(BB) && "Should already have cleared domtree!"
) ? void (0) : __assert_fail ("!DT.getNode(BB) && \"Should already have cleared domtree!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1755, __extension__
__PRETTY_FUNCTION__))
;
1756 LI.changeLoopFor(BB, nullptr);
1757 // Drop all uses of the instructions to make sure we won't have dangling
1758 // uses in other blocks.
1759 for (auto &I : *BB)
1760 if (!I.use_empty())
1761 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1762 BB->dropAllReferences();
1763 }
1764
1765 // Actually delete the blocks now that they've been fully unhooked from the
1766 // IR.
1767 for (auto *BB : DeadBlockSet)
1768 BB->eraseFromParent();
1769}
1770
1771/// Recompute the set of blocks in a loop after unswitching.
1772///
1773/// This walks from the original headers predecessors to rebuild the loop. We
1774/// take advantage of the fact that new blocks can't have been added, and so we
1775/// filter by the original loop's blocks. This also handles potentially
1776/// unreachable code that we don't want to explore but might be found examining
1777/// the predecessors of the header.
1778///
1779/// If the original loop is no longer a loop, this will return an empty set. If
1780/// it remains a loop, all the blocks within it will be added to the set
1781/// (including those blocks in inner loops).
1782static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
1783 LoopInfo &LI) {
1784 SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
1785
1786 auto *PH = L.getLoopPreheader();
1787 auto *Header = L.getHeader();
1788
1789 // A worklist to use while walking backwards from the header.
1790 SmallVector<BasicBlock *, 16> Worklist;
1791
1792 // First walk the predecessors of the header to find the backedges. This will
1793 // form the basis of our walk.
1794 for (auto *Pred : predecessors(Header)) {
1795 // Skip the preheader.
1796 if (Pred == PH)
1797 continue;
1798
1799 // Because the loop was in simplified form, the only non-loop predecessor
1800 // is the preheader.
1801 assert(L.contains(Pred) && "Found a predecessor of the loop header other "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1803, __extension__
__PRETTY_FUNCTION__))
1802 "than the preheader that is not part of the "(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1803, __extension__
__PRETTY_FUNCTION__))
1803 "loop!")(static_cast <bool> (L.contains(Pred) && "Found a predecessor of the loop header other "
"than the preheader that is not part of the " "loop!") ? void
(0) : __assert_fail ("L.contains(Pred) && \"Found a predecessor of the loop header other \" \"than the preheader that is not part of the \" \"loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1803, __extension__
__PRETTY_FUNCTION__))
;
1804
1805 // Insert this block into the loop set and on the first visit and, if it
1806 // isn't the header we're currently walking, put it into the worklist to
1807 // recurse through.
1808 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1809 Worklist.push_back(Pred);
1810 }
1811
1812 // If no backedges were found, we're done.
1813 if (LoopBlockSet.empty())
1814 return LoopBlockSet;
1815
1816 // We found backedges, recurse through them to identify the loop blocks.
1817 while (!Worklist.empty()) {
1818 BasicBlock *BB = Worklist.pop_back_val();
1819 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!")(static_cast <bool> (LoopBlockSet.count(BB) && "Didn't put block into the loop set!"
) ? void (0) : __assert_fail ("LoopBlockSet.count(BB) && \"Didn't put block into the loop set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1819, __extension__
__PRETTY_FUNCTION__))
;
1820
1821 // No need to walk past the header.
1822 if (BB == Header)
1823 continue;
1824
1825 // Because we know the inner loop structure remains valid we can use the
1826 // loop structure to jump immediately across the entire nested loop.
1827 // Further, because it is in loop simplified form, we can directly jump
1828 // to its preheader afterward.
1829 if (Loop *InnerL = LI.getLoopFor(BB))
1830 if (InnerL != &L) {
1831 assert(L.contains(InnerL) &&(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!"
) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1832, __extension__
__PRETTY_FUNCTION__))
1832 "Should not reach a loop *outside* this loop!")(static_cast <bool> (L.contains(InnerL) && "Should not reach a loop *outside* this loop!"
) ? void (0) : __assert_fail ("L.contains(InnerL) && \"Should not reach a loop *outside* this loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1832, __extension__
__PRETTY_FUNCTION__))
;
1833 // The preheader is the only possible predecessor of the loop so
1834 // insert it into the set and check whether it was already handled.
1835 auto *InnerPH = InnerL->getLoopPreheader();
1836 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1838, __extension__
__PRETTY_FUNCTION__))
1837 "but not contain the inner loop "(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1838, __extension__
__PRETTY_FUNCTION__))
1838 "preheader!")(static_cast <bool> (L.contains(InnerPH) && "Cannot contain an inner loop block "
"but not contain the inner loop " "preheader!") ? void (0) :
__assert_fail ("L.contains(InnerPH) && \"Cannot contain an inner loop block \" \"but not contain the inner loop \" \"preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1838, __extension__
__PRETTY_FUNCTION__))
;
1839 if (!LoopBlockSet.insert(InnerPH).second)
1840 // The only way to reach the preheader is through the loop body
1841 // itself so if it has been visited the loop is already handled.
1842 continue;
1843
1844 // Insert all of the blocks (other than those already present) into
1845 // the loop set. We expect at least the block that led us to find the
1846 // inner loop to be in the block set, but we may also have other loop
1847 // blocks if they were already enqueued as predecessors of some other
1848 // outer loop block.
1849 for (auto *InnerBB : InnerL->blocks()) {
1850 if (InnerBB == BB) {
1851 assert(LoopBlockSet.count(InnerBB) &&(static_cast <bool> (LoopBlockSet.count(InnerBB) &&
"Block should already be in the set!") ? void (0) : __assert_fail
("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1852, __extension__
__PRETTY_FUNCTION__))
1852 "Block should already be in the set!")(static_cast <bool> (LoopBlockSet.count(InnerBB) &&
"Block should already be in the set!") ? void (0) : __assert_fail
("LoopBlockSet.count(InnerBB) && \"Block should already be in the set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1852, __extension__
__PRETTY_FUNCTION__))
;
1853 continue;
1854 }
1855
1856 LoopBlockSet.insert(InnerBB);
1857 }
1858
1859 // Add the preheader to the worklist so we will continue past the
1860 // loop body.
1861 Worklist.push_back(InnerPH);
1862 continue;
1863 }
1864
1865 // Insert any predecessors that were in the original loop into the new
1866 // set, and if the insert is successful, add them to the worklist.
1867 for (auto *Pred : predecessors(BB))
1868 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1869 Worklist.push_back(Pred);
1870 }
1871
1872 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!")(static_cast <bool> (LoopBlockSet.count(Header) &&
"Cannot fail to add the header!") ? void (0) : __assert_fail
("LoopBlockSet.count(Header) && \"Cannot fail to add the header!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1872, __extension__
__PRETTY_FUNCTION__))
;
1873
1874 // We've found all the blocks participating in the loop, return our completed
1875 // set.
1876 return LoopBlockSet;
1877}
1878
1879/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1880///
1881/// The removal may have removed some child loops entirely but cannot have
1882/// disturbed any remaining child loops. However, they may need to be hoisted
1883/// to the parent loop (or to be top-level loops). The original loop may be
1884/// completely removed.
1885///
1886/// The sibling loops resulting from this update are returned. If the original
1887/// loop remains a valid loop, it will be the first entry in this list with all
1888/// of the newly sibling loops following it.
1889///
1890/// Returns true if the loop remains a loop after unswitching, and false if it
1891/// is no longer a loop after unswitching (and should not continue to be
1892/// referenced).
1893static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1894 LoopInfo &LI,
1895 SmallVectorImpl<Loop *> &HoistedLoops,
1896 ScalarEvolution *SE) {
1897 auto *PH = L.getLoopPreheader();
1898
1899 // Compute the actual parent loop from the exit blocks. Because we may have
1900 // pruned some exits the loop may be different from the original parent.
1901 Loop *ParentL = nullptr;
1902 SmallVector<Loop *, 4> ExitLoops;
1903 SmallVector<BasicBlock *, 4> ExitsInLoops;
1904 ExitsInLoops.reserve(ExitBlocks.size());
1905 for (auto *ExitBB : ExitBlocks)
1906 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1907 ExitLoops.push_back(ExitL);
1908 ExitsInLoops.push_back(ExitBB);
1909 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1910 ParentL = ExitL;
1911 }
1912
1913 // Recompute the blocks participating in this loop. This may be empty if it
1914 // is no longer a loop.
1915 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1916
1917 // If we still have a loop, we need to re-set the loop's parent as the exit
1918 // block set changing may have moved it within the loop nest. Note that this
1919 // can only happen when this loop has a parent as it can only hoist the loop
1920 // *up* the nest.
1921 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1922 // Remove this loop's (original) blocks from all of the intervening loops.
1923 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1924 IL = IL->getParentLoop()) {
1925 IL->getBlocksSet().erase(PH);
1926 for (auto *BB : L.blocks())
1927 IL->getBlocksSet().erase(BB);
1928 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1929 return BB == PH || L.contains(BB);
1930 });
1931 }
1932
1933 LI.changeLoopFor(PH, ParentL);
1934 L.getParentLoop()->removeChildLoop(&L);
1935 if (ParentL)
1936 ParentL->addChildLoop(&L);
1937 else
1938 LI.addTopLevelLoop(&L);
1939 }
1940
1941 // Now we update all the blocks which are no longer within the loop.
1942 auto &Blocks = L.getBlocksVector();
1943 auto BlocksSplitI =
1944 LoopBlockSet.empty()
1945 ? Blocks.begin()
1946 : std::stable_partition(
1947 Blocks.begin(), Blocks.end(),
1948 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1949
1950 // Before we erase the list of unlooped blocks, build a set of them.
1951 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1952 if (LoopBlockSet.empty())
1953 UnloopedBlocks.insert(PH);
1954
1955 // Now erase these blocks from the loop.
1956 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1957 L.getBlocksSet().erase(BB);
1958 Blocks.erase(BlocksSplitI, Blocks.end());
1959
1960 // Sort the exits in ascending loop depth, we'll work backwards across these
1961 // to process them inside out.
1962 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1963 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1964 });
1965
1966 // We'll build up a set for each exit loop.
1967 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1968 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1969
1970 auto RemoveUnloopedBlocksFromLoop =
1971 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1972 for (auto *BB : UnloopedBlocks)
1973 L.getBlocksSet().erase(BB);
1974 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1975 return UnloopedBlocks.count(BB);
1976 });
1977 };
1978
1979 SmallVector<BasicBlock *, 16> Worklist;
1980 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1981 assert(Worklist.empty() && "Didn't clear worklist!")(static_cast <bool> (Worklist.empty() && "Didn't clear worklist!"
) ? void (0) : __assert_fail ("Worklist.empty() && \"Didn't clear worklist!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1981, __extension__
__PRETTY_FUNCTION__))
;
1982 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!")(static_cast <bool> (NewExitLoopBlocks.empty() &&
"Didn't clear loop set!") ? void (0) : __assert_fail ("NewExitLoopBlocks.empty() && \"Didn't clear loop set!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1982, __extension__
__PRETTY_FUNCTION__))
;
1983
1984 // Grab the next exit block, in decreasing loop depth order.
1985 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1986 Loop &ExitL = *LI.getLoopFor(ExitBB);
1987 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!")(static_cast <bool> (ExitL.contains(&L) && "Exit loop must contain the inner loop!"
) ? void (0) : __assert_fail ("ExitL.contains(&L) && \"Exit loop must contain the inner loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 1987, __extension__
__PRETTY_FUNCTION__))
;
1988
1989 // Erase all of the unlooped blocks from the loops between the previous
1990 // exit loop and this exit loop. This works because the ExitInLoops list is
1991 // sorted in increasing order of loop depth and thus we visit loops in
1992 // decreasing order of loop depth.
1993 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1994 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1995
1996 // Walk the CFG back until we hit the cloned PH adding everything reachable
1997 // and in the unlooped set to this exit block's loop.
1998 Worklist.push_back(ExitBB);
1999 do {
2000 BasicBlock *BB = Worklist.pop_back_val();
2001 // We can stop recursing at the cloned preheader (if we get there).
2002 if (BB == PH)
2003 continue;
2004
2005 for (BasicBlock *PredBB : predecessors(BB)) {
2006 // If this pred has already been moved to our set or is part of some
2007 // (inner) loop, no update needed.
2008 if (!UnloopedBlocks.erase(PredBB)) {
2009 assert((NewExitLoopBlocks.count(PredBB) ||(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2011, __extension__
__PRETTY_FUNCTION__))
2010 ExitL.contains(LI.getLoopFor(PredBB))) &&(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2011, __extension__
__PRETTY_FUNCTION__))
2011 "Predecessor not in a nested loop (or already visited)!")(static_cast <bool> ((NewExitLoopBlocks.count(PredBB) ||
ExitL.contains(LI.getLoopFor(PredBB))) && "Predecessor not in a nested loop (or already visited)!"
) ? void (0) : __assert_fail ("(NewExitLoopBlocks.count(PredBB) || ExitL.contains(LI.getLoopFor(PredBB))) && \"Predecessor not in a nested loop (or already visited)!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2011, __extension__
__PRETTY_FUNCTION__))
;
2012 continue;
2013 }
2014
2015 // We just insert into the loop set here. We'll add these blocks to the
2016 // exit loop after we build up the set in a deterministic order rather
2017 // than the predecessor-influenced visit order.
2018 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2019 (void)Inserted;
2020 assert(Inserted && "Should only visit an unlooped block once!")(static_cast <bool> (Inserted && "Should only visit an unlooped block once!"
) ? void (0) : __assert_fail ("Inserted && \"Should only visit an unlooped block once!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2020, __extension__
__PRETTY_FUNCTION__))
;
2021
2022 // And recurse through to its predecessors.
2023 Worklist.push_back(PredBB);
2024 }
2025 } while (!Worklist.empty());
2026
2027 // If blocks in this exit loop were directly part of the original loop (as
2028 // opposed to a child loop) update the map to point to this exit loop. This
2029 // just updates a map and so the fact that the order is unstable is fine.
2030 for (auto *BB : NewExitLoopBlocks)
2031 if (Loop *BBL = LI.getLoopFor(BB))
2032 if (BBL == &L || !L.contains(BBL))
2033 LI.changeLoopFor(BB, &ExitL);
2034
2035 // We will remove the remaining unlooped blocks from this loop in the next
2036 // iteration or below.
2037 NewExitLoopBlocks.clear();
2038 }
2039
2040 // Any remaining unlooped blocks are no longer part of any loop unless they
2041 // are part of some child loop.
2042 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2043 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2044 for (auto *BB : UnloopedBlocks)
2045 if (Loop *BBL = LI.getLoopFor(BB))
2046 if (BBL == &L || !L.contains(BBL))
2047 LI.changeLoopFor(BB, nullptr);
2048
2049 // Sink all the child loops whose headers are no longer in the loop set to
2050 // the parent (or to be top level loops). We reach into the loop and directly
2051 // update its subloop vector to make this batch update efficient.
2052 auto &SubLoops = L.getSubLoopsVector();
2053 auto SubLoopsSplitI =
2054 LoopBlockSet.empty()
2055 ? SubLoops.begin()
2056 : std::stable_partition(
2057 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2058 return LoopBlockSet.count(SubL->getHeader());
2059 });
2060 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2061 HoistedLoops.push_back(HoistedL);
2062 HoistedL->setParentLoop(nullptr);
2063
2064 // To compute the new parent of this hoisted loop we look at where we
2065 // placed the preheader above. We can't lookup the header itself because we
2066 // retained the mapping from the header to the hoisted loop. But the
2067 // preheader and header should have the exact same new parent computed
2068 // based on the set of exit blocks from the original loop as the preheader
2069 // is a predecessor of the header and so reached in the reverse walk. And
2070 // because the loops were all in simplified form the preheader of the
2071 // hoisted loop can't be part of some *other* loop.
2072 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2073 NewParentL->addChildLoop(HoistedL);
2074 else
2075 LI.addTopLevelLoop(HoistedL);
2076 }
2077 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2078
2079 // Actually delete the loop if nothing remained within it.
2080 if (Blocks.empty()) {
2081 assert(SubLoops.empty() &&(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!"
) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2082, __extension__
__PRETTY_FUNCTION__))
2082 "Failed to remove all subloops from the original loop!")(static_cast <bool> (SubLoops.empty() && "Failed to remove all subloops from the original loop!"
) ? void (0) : __assert_fail ("SubLoops.empty() && \"Failed to remove all subloops from the original loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2082, __extension__
__PRETTY_FUNCTION__))
;
2083 if (Loop *ParentL = L.getParentLoop())
2084 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2085 else
2086 LI.removeLoop(llvm::find(LI, &L));
2087 // markLoopAsDeleted for L should be triggered by the caller (it is typically
2088 // done by using the UnswitchCB callback).
2089 if (SE)
2090 SE->forgetBlockAndLoopDispositions();
2091 LI.destroy(&L);
2092 return false;
2093 }
2094
2095 return true;
2096}
2097
2098/// Helper to visit a dominator subtree, invoking a callable on each node.
2099///
2100/// Returning false at any point will stop walking past that node of the tree.
2101template <typename CallableT>
2102void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2103 SmallVector<DomTreeNode *, 4> DomWorklist;
2104 DomWorklist.push_back(DT[BB]);
2105#ifndef NDEBUG
2106 SmallPtrSet<DomTreeNode *, 4> Visited;
2107 Visited.insert(DT[BB]);
2108#endif
2109 do {
2110 DomTreeNode *N = DomWorklist.pop_back_val();
2111
2112 // Visit this node.
2113 if (!Callable(N->getBlock()))
2114 continue;
2115
2116 // Accumulate the child nodes.
2117 for (DomTreeNode *ChildN : *N) {
2118 assert(Visited.insert(ChildN).second &&(static_cast <bool> (Visited.insert(ChildN).second &&
"Cannot visit a node twice when walking a tree!") ? void (0)
: __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2119, __extension__
__PRETTY_FUNCTION__))
2119 "Cannot visit a node twice when walking a tree!")(static_cast <bool> (Visited.insert(ChildN).second &&
"Cannot visit a node twice when walking a tree!") ? void (0)
: __assert_fail ("Visited.insert(ChildN).second && \"Cannot visit a node twice when walking a tree!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2119, __extension__
__PRETTY_FUNCTION__))
;
2120 DomWorklist.push_back(ChildN);
2121 }
2122 } while (!DomWorklist.empty());
2123}
2124
2125static void unswitchNontrivialInvariants(
2126 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2127 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2128 AssumptionCache &AC,
2129 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2130 ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2131 function_ref<void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze) {
2132 auto *ParentBB = TI.getParent();
2133 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2134 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2135
2136 // We can only unswitch switches, conditional branches with an invariant
2137 // condition, or combining invariant conditions with an instruction or
2138 // partially invariant instructions.
2139 assert((SI || (BI && BI->isConditional())) &&(static_cast <bool> ((SI || (BI && BI->isConditional
())) && "Can only unswitch switches and conditional branch!"
) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2140, __extension__
__PRETTY_FUNCTION__))
2140 "Can only unswitch switches and conditional branch!")(static_cast <bool> ((SI || (BI && BI->isConditional
())) && "Can only unswitch switches and conditional branch!"
) ? void (0) : __assert_fail ("(SI || (BI && BI->isConditional())) && \"Can only unswitch switches and conditional branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2140, __extension__
__PRETTY_FUNCTION__))
;
2141 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2142 bool FullUnswitch =
2143 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2144 !PartiallyInvariant);
2145 if (FullUnswitch)
2146 assert(Invariants.size() == 1 &&(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"
) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2147, __extension__
__PRETTY_FUNCTION__))
2147 "Cannot have other invariants with full unswitching!")(static_cast <bool> (Invariants.size() == 1 && "Cannot have other invariants with full unswitching!"
) ? void (0) : __assert_fail ("Invariants.size() == 1 && \"Cannot have other invariants with full unswitching!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2147, __extension__
__PRETTY_FUNCTION__))
;
2148 else
2149 assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&(static_cast <bool> (isa<Instruction>(skipTrivialSelect
(BI->getCondition())) && "Partial unswitching requires an instruction as the condition!"
) ? void (0) : __assert_fail ("isa<Instruction>(skipTrivialSelect(BI->getCondition())) && \"Partial unswitching requires an instruction as the condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2150, __extension__
__PRETTY_FUNCTION__))
2150 "Partial unswitching requires an instruction as the condition!")(static_cast <bool> (isa<Instruction>(skipTrivialSelect
(BI->getCondition())) && "Partial unswitching requires an instruction as the condition!"
) ? void (0) : __assert_fail ("isa<Instruction>(skipTrivialSelect(BI->getCondition())) && \"Partial unswitching requires an instruction as the condition!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2150, __extension__
__PRETTY_FUNCTION__))
;
2151
2152 if (MSSAU && VerifyMemorySSA)
2153 MSSAU->getMemorySSA()->verifyMemorySSA();
2154
2155 // Constant and BBs tracking the cloned and continuing successor. When we are
2156 // unswitching the entire condition, this can just be trivially chosen to
2157 // unswitch towards `true`. However, when we are unswitching a set of
2158 // invariants combined with `and` or `or` or partially invariant instructions,
2159 // the combining operation determines the best direction to unswitch: we want
2160 // to unswitch the direction that will collapse the branch.
2161 bool Direction = true;
2162 int ClonedSucc = 0;
2163 if (!FullUnswitch) {
2164 Value *Cond = skipTrivialSelect(BI->getCondition());
2165 (void)Cond;
2166 assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2169, __extension__
__PRETTY_FUNCTION__))
2167 PartiallyInvariant) &&(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2169, __extension__
__PRETTY_FUNCTION__))
2168 "Only `or`, `and`, an `select`, partially invariant instructions "(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2169, __extension__
__PRETTY_FUNCTION__))
2169 "can combine invariants being unswitched.")(static_cast <bool> (((match(Cond, m_LogicalAnd()) ^ match
(Cond, m_LogicalOr())) || PartiallyInvariant) && "Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.") ? void (0) : __assert_fail
("((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || PartiallyInvariant) && \"Only `or`, `and`, an `select`, partially invariant instructions \" \"can combine invariants being unswitched.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2169, __extension__
__PRETTY_FUNCTION__))
;
2170 if (!match(Cond, m_LogicalOr())) {
2171 if (match(Cond, m_LogicalAnd()) ||
2172 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2173 Direction = false;
2174 ClonedSucc = 1;
2175 }
2176 }
2177 }
2178
2179 BasicBlock *RetainedSuccBB =
2180 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2181 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2182 if (BI)
2183 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2184 else
2185 for (auto Case : SI->cases())
2186 if (Case.getCaseSuccessor() != RetainedSuccBB)
2187 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2188
2189 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB
) && "Should not unswitch the same successor we are retaining!"
) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2190, __extension__
__PRETTY_FUNCTION__))
2190 "Should not unswitch the same successor we are retaining!")(static_cast <bool> (!UnswitchedSuccBBs.count(RetainedSuccBB
) && "Should not unswitch the same successor we are retaining!"
) ? void (0) : __assert_fail ("!UnswitchedSuccBBs.count(RetainedSuccBB) && \"Should not unswitch the same successor we are retaining!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2190, __extension__
__PRETTY_FUNCTION__))
;
2191
2192 // The branch should be in this exact loop. Any inner loop's invariant branch
2193 // should be handled by unswitching that inner loop. The caller of this
2194 // routine should filter out any candidates that remain (but were skipped for
2195 // whatever reason).
2196 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!")(static_cast <bool> (LI.getLoopFor(ParentBB) == &L &&
"Branch in an inner loop!") ? void (0) : __assert_fail ("LI.getLoopFor(ParentBB) == &L && \"Branch in an inner loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2196, __extension__
__PRETTY_FUNCTION__))
;
2197
2198 // Compute the parent loop now before we start hacking on things.
2199 Loop *ParentL = L.getParentLoop();
2200 // Get blocks in RPO order for MSSA update, before changing the CFG.
2201 LoopBlocksRPO LBRPO(&L);
2202 if (MSSAU)
2203 LBRPO.perform(&LI);
2204
2205 // Compute the outer-most loop containing one of our exit blocks. This is the
2206 // furthest up our loopnest which can be mutated, which we will use below to
2207 // update things.
2208 Loop *OuterExitL = &L;
2209 SmallVector<BasicBlock *, 4> ExitBlocks;
2210 L.getUniqueExitBlocks(ExitBlocks);
2211 for (auto *ExitBB : ExitBlocks) {
2212 // ExitBB can be an exit block for several levels in the loop nest. Make
2213 // sure we find the top most.
2214 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2215 if (!NewOuterExitL) {
2216 // We exited the entire nest with this block, so we're done.
2217 OuterExitL = nullptr;
2218 break;
2219 }
2220 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2221 OuterExitL = NewOuterExitL;
2222 }
2223
2224 // At this point, we're definitely going to unswitch something so invalidate
2225 // any cached information in ScalarEvolution for the outer most loop
2226 // containing an exit block and all nested loops.
2227 if (SE) {
2228 if (OuterExitL)
2229 SE->forgetLoop(OuterExitL);
2230 else
2231 SE->forgetTopmostLoop(&L);
2232 SE->forgetBlockAndLoopDispositions();
2233 }
2234
2235 // If the edge from this terminator to a successor dominates that successor,
2236 // store a map from each block in its dominator subtree to it. This lets us
2237 // tell when cloning for a particular successor if a block is dominated by
2238 // some *other* successor with a single data structure. We use this to
2239 // significantly reduce cloning.
2240 SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
2241 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2242 UnswitchedSuccBBs))
2243 if (SuccBB->getUniquePredecessor() ||
2244 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2245 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2246 }))
2247 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2248 DominatingSucc[BB] = SuccBB;
2249 return true;
2250 });
2251
2252 // Split the preheader, so that we know that there is a safe place to insert
2253 // the conditional branch. We will change the preheader to have a conditional
2254 // branch on LoopCond. The original preheader will become the split point
2255 // between the unswitched versions, and we will have a new preheader for the
2256 // original loop.
2257 BasicBlock *SplitBB = L.getLoopPreheader();
2258 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2259
2260 // Keep track of the dominator tree updates needed.
2261 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2262
2263 // Clone the loop for each unswitched successor.
2264 SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2265 VMaps.reserve(UnswitchedSuccBBs.size());
2266 SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
2267 for (auto *SuccBB : UnswitchedSuccBBs) {
2268 VMaps.emplace_back(new ValueToValueMapTy());
2269 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2270 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2271 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2272 }
2273
2274 // Drop metadata if we may break its semantics by moving this instr into the
2275 // split block.
2276 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2277 if (DropNonTrivialImplicitNullChecks)
2278 // Do not spend time trying to understand if we can keep it, just drop it
2279 // to save compile time.
2280 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2281 else {
2282 // It is only legal to preserve make.implicit metadata if we are
2283 // guaranteed no reach implicit null check after following this branch.
2284 ICFLoopSafetyInfo SafetyInfo;
2285 SafetyInfo.computeLoopSafetyInfo(&L);
2286 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2287 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2288 }
2289 }
2290
2291 // The stitching of the branched code back together depends on whether we're
2292 // doing full unswitching or not with the exception that we always want to
2293 // nuke the initial terminator placed in the split block.
2294 SplitBB->getTerminator()->eraseFromParent();
2295 if (FullUnswitch) {
2296 // Splice the terminator from the original loop and rewrite its
2297 // successors.
2298 SplitBB->splice(SplitBB->end(), ParentBB, TI.getIterator());
2299
2300 // Keep a clone of the terminator for MSSA updates.
2301 Instruction *NewTI = TI.clone();
2302 NewTI->insertInto(ParentBB, ParentBB->end());
2303
2304 // First wire up the moved terminator to the preheaders.
2305 if (BI) {
2306 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2307 BI->setSuccessor(ClonedSucc, ClonedPH);
2308 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2309 Value *Cond = skipTrivialSelect(BI->getCondition());
2310 if (InsertFreeze)
2311 Cond = new FreezeInst(
2312 Cond, Cond->getName() + ".fr", BI);
2313 BI->setCondition(Cond);
2314 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2315 } else {
2316 assert(SI && "Must either be a branch or switch!")(static_cast <bool> (SI && "Must either be a branch or switch!"
) ? void (0) : __assert_fail ("SI && \"Must either be a branch or switch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2316, __extension__
__PRETTY_FUNCTION__))
;
2317
2318 // Walk the cases and directly update their successors.
2319 assert(SI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2320, __extension__
__PRETTY_FUNCTION__))
2320 "Not retaining default successor!")(static_cast <bool> (SI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("SI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2320, __extension__
__PRETTY_FUNCTION__))
;
2321 SI->setDefaultDest(LoopPH);
2322 for (const auto &Case : SI->cases())
2323 if (Case.getCaseSuccessor() == RetainedSuccBB)
2324 Case.setSuccessor(LoopPH);
2325 else
2326 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2327
2328 if (InsertFreeze)
2329 SI->setCondition(new FreezeInst(
2330 SI->getCondition(), SI->getCondition()->getName() + ".fr", SI));
2331
2332 // We need to use the set to populate domtree updates as even when there
2333 // are multiple cases pointing at the same successor we only want to
2334 // remove and insert one edge in the domtree.
2335 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2336 DTUpdates.push_back(
2337 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2338 }
2339
2340 if (MSSAU) {
2341 DT.applyUpdates(DTUpdates);
2342 DTUpdates.clear();
2343
2344 // Remove all but one edge to the retained block and all unswitched
2345 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2346 // when we know we only keep a single edge for each case.
2347 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2348 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2349 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2350
2351 for (auto &VMap : VMaps)
2352 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2353 /*IgnoreIncomingWithNoClones=*/true);
2354 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2355
2356 // Remove all edges to unswitched blocks.
2357 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2358 MSSAU->removeEdge(ParentBB, SuccBB);
2359 }
2360
2361 // Now unhook the successor relationship as we'll be replacing
2362 // the terminator with a direct branch. This is much simpler for branches
2363 // than switches so we handle those first.
2364 if (BI) {
2365 // Remove the parent as a predecessor of the unswitched successor.
2366 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2367, __extension__
__PRETTY_FUNCTION__))
2367 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2367, __extension__
__PRETTY_FUNCTION__))
;
2368 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2369 UnswitchedSuccBB->removePredecessor(ParentBB,
2370 /*KeepOneInputPHIs*/ true);
2371 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2372 } else {
2373 // Note that we actually want to remove the parent block as a predecessor
2374 // of *every* case successor. The case successor is either unswitched,
2375 // completely eliminating an edge from the parent to that successor, or it
2376 // is a duplicate edge to the retained successor as the retained successor
2377 // is always the default successor and as we'll replace this with a direct
2378 // branch we no longer need the duplicate entries in the PHI nodes.
2379 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2380 assert(NewSI->getDefaultDest() == RetainedSuccBB &&(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2381, __extension__
__PRETTY_FUNCTION__))
2381 "Not retaining default successor!")(static_cast <bool> (NewSI->getDefaultDest() == RetainedSuccBB
&& "Not retaining default successor!") ? void (0) : __assert_fail
("NewSI->getDefaultDest() == RetainedSuccBB && \"Not retaining default successor!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2381, __extension__
__PRETTY_FUNCTION__))
;
2382 for (const auto &Case : NewSI->cases())
2383 Case.getCaseSuccessor()->removePredecessor(
2384 ParentBB,
2385 /*KeepOneInputPHIs*/ true);
2386
2387 // We need to use the set to populate domtree updates as even when there
2388 // are multiple cases pointing at the same successor we only want to
2389 // remove and insert one edge in the domtree.
2390 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2391 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2392 }
2393
2394 // After MSSAU update, remove the cloned terminator instruction NewTI.
2395 ParentBB->getTerminator()->eraseFromParent();
2396
2397 // Create a new unconditional branch to the continuing block (as opposed to
2398 // the one cloned).
2399 BranchInst::Create(RetainedSuccBB, ParentBB);
2400 } else {
2401 assert(BI && "Only branches have partial unswitching.")(static_cast <bool> (BI && "Only branches have partial unswitching."
) ? void (0) : __assert_fail ("BI && \"Only branches have partial unswitching.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2401, __extension__
__PRETTY_FUNCTION__))
;
2402 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2403, __extension__
__PRETTY_FUNCTION__))
2403 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2403, __extension__
__PRETTY_FUNCTION__))
;
2404 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2405 // When doing a partial unswitch, we have to do a bit more work to build up
2406 // the branch in the split block.
2407 if (PartiallyInvariant)
2408 buildPartialInvariantUnswitchConditionalBranch(
2409 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2410 else {
2411 buildPartialUnswitchConditionalBranch(
2412 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2413 FreezeLoopUnswitchCond, BI, &AC, DT);
2414 }
2415 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2416
2417 if (MSSAU) {
2418 DT.applyUpdates(DTUpdates);
2419 DTUpdates.clear();
2420
2421 // Perform MSSA cloning updates.
2422 for (auto &VMap : VMaps)
2423 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2424 /*IgnoreIncomingWithNoClones=*/true);
2425 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2426 }
2427 }
2428
2429 // Apply the updates accumulated above to get an up-to-date dominator tree.
2430 DT.applyUpdates(DTUpdates);
2431
2432 // Now that we have an accurate dominator tree, first delete the dead cloned
2433 // blocks so that we can accurately build any cloned loops. It is important to
2434 // not delete the blocks from the original loop yet because we still want to
2435 // reference the original loop to understand the cloned loop's structure.
2436 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2437
2438 // Build the cloned loop structure itself. This may be substantially
2439 // different from the original structure due to the simplified CFG. This also
2440 // handles inserting all the cloned blocks into the correct loops.
2441 SmallVector<Loop *, 4> NonChildClonedLoops;
2442 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2443 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2444
2445 // Now that our cloned loops have been built, we can update the original loop.
2446 // First we delete the dead blocks from it and then we rebuild the loop
2447 // structure taking these deletions into account.
2448 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE,DestroyLoopCB);
2449
2450 if (MSSAU && VerifyMemorySSA)
2451 MSSAU->getMemorySSA()->verifyMemorySSA();
2452
2453 SmallVector<Loop *, 4> HoistedLoops;
2454 bool IsStillLoop =
2455 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2456
2457 if (MSSAU && VerifyMemorySSA)
2458 MSSAU->getMemorySSA()->verifyMemorySSA();
2459
2460 // This transformation has a high risk of corrupting the dominator tree, and
2461 // the below steps to rebuild loop structures will result in hard to debug
2462 // errors in that case so verify that the dominator tree is sane first.
2463 // FIXME: Remove this when the bugs stop showing up and rely on existing
2464 // verification steps.
2465 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2465, __extension__
__PRETTY_FUNCTION__))
;
2466
2467 if (BI && !PartiallyInvariant) {
2468 // If we unswitched a branch which collapses the condition to a known
2469 // constant we want to replace all the uses of the invariants within both
2470 // the original and cloned blocks. We do this here so that we can use the
2471 // now updated dominator tree to identify which side the users are on.
2472 assert(UnswitchedSuccBBs.size() == 1 &&(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2473, __extension__
__PRETTY_FUNCTION__))
2473 "Only one possible unswitched block for a branch!")(static_cast <bool> (UnswitchedSuccBBs.size() == 1 &&
"Only one possible unswitched block for a branch!") ? void (
0) : __assert_fail ("UnswitchedSuccBBs.size() == 1 && \"Only one possible unswitched block for a branch!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2473, __extension__
__PRETTY_FUNCTION__))
;
2474 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2475
2476 // When considering multiple partially-unswitched invariants
2477 // we cant just go replace them with constants in both branches.
2478 //
2479 // For 'AND' we infer that true branch ("continue") means true
2480 // for each invariant operand.
2481 // For 'OR' we can infer that false branch ("continue") means false
2482 // for each invariant operand.
2483 // So it happens that for multiple-partial case we dont replace
2484 // in the unswitched branch.
2485 bool ReplaceUnswitched =
2486 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2487
2488 ConstantInt *UnswitchedReplacement =
2489 Direction ? ConstantInt::getTrue(BI->getContext())
2490 : ConstantInt::getFalse(BI->getContext());
2491 ConstantInt *ContinueReplacement =
2492 Direction ? ConstantInt::getFalse(BI->getContext())
2493 : ConstantInt::getTrue(BI->getContext());
2494 for (Value *Invariant : Invariants) {
2495 assert(!isa<Constant>(Invariant) &&(static_cast <bool> (!isa<Constant>(Invariant) &&
"Should not be replacing constant values!") ? void (0) : __assert_fail
("!isa<Constant>(Invariant) && \"Should not be replacing constant values!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2496, __extension__
__PRETTY_FUNCTION__))
2496 "Should not be replacing constant values!")(static_cast <bool> (!isa<Constant>(Invariant) &&
"Should not be replacing constant values!") ? void (0) : __assert_fail
("!isa<Constant>(Invariant) && \"Should not be replacing constant values!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2496, __extension__
__PRETTY_FUNCTION__))
;
2497 // Use make_early_inc_range here as set invalidates the iterator.
2498 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2499 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2500 if (!UserI)
2501 continue;
2502
2503 // Replace it with the 'continue' side if in the main loop body, and the
2504 // unswitched if in the cloned blocks.
2505 if (DT.dominates(LoopPH, UserI->getParent()))
2506 U.set(ContinueReplacement);
2507 else if (ReplaceUnswitched &&
2508 DT.dominates(ClonedPH, UserI->getParent()))
2509 U.set(UnswitchedReplacement);
2510 }
2511 }
2512 }
2513
2514 // We can change which blocks are exit blocks of all the cloned sibling
2515 // loops, the current loop, and any parent loops which shared exit blocks
2516 // with the current loop. As a consequence, we need to re-form LCSSA for
2517 // them. But we shouldn't need to re-form LCSSA for any child loops.
2518 // FIXME: This could be made more efficient by tracking which exit blocks are
2519 // new, and focusing on them, but that isn't likely to be necessary.
2520 //
2521 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2522 // loop nest and update every loop that could have had its exits changed. We
2523 // also need to cover any intervening loops. We add all of these loops to
2524 // a list and sort them by loop depth to achieve this without updating
2525 // unnecessary loops.
2526 auto UpdateLoop = [&](Loop &UpdateL) {
2527#ifndef NDEBUG
2528 UpdateL.verifyLoop();
2529 for (Loop *ChildL : UpdateL) {
2530 ChildL->verifyLoop();
2531 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (ChildL->isRecursivelyLCSSAForm(
DT, LI) && "Perturbed a child loop's LCSSA form!") ? void
(0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2532, __extension__
__PRETTY_FUNCTION__))
2532 "Perturbed a child loop's LCSSA form!")(static_cast <bool> (ChildL->isRecursivelyLCSSAForm(
DT, LI) && "Perturbed a child loop's LCSSA form!") ? void
(0) : __assert_fail ("ChildL->isRecursivelyLCSSAForm(DT, LI) && \"Perturbed a child loop's LCSSA form!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2532, __extension__
__PRETTY_FUNCTION__))
;
2533 }
2534#endif
2535 // First build LCSSA for this loop so that we can preserve it when
2536 // forming dedicated exits. We don't want to perturb some other loop's
2537 // LCSSA while doing that CFG edit.
2538 formLCSSA(UpdateL, DT, &LI);
2539
2540 // For loops reached by this loop's original exit blocks we may
2541 // introduced new, non-dedicated exits. At least try to re-form dedicated
2542 // exits for these loops. This may fail if they couldn't have dedicated
2543 // exits to start with.
2544 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2545 };
2546
2547 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2548 // and we can do it in any order as they don't nest relative to each other.
2549 //
2550 // Also check if any of the loops we have updated have become top-level loops
2551 // as that will necessitate widening the outer loop scope.
2552 for (Loop *UpdatedL :
2553 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2554 UpdateLoop(*UpdatedL);
2555 if (UpdatedL->isOutermost())
2556 OuterExitL = nullptr;
2557 }
2558 if (IsStillLoop) {
2559 UpdateLoop(L);
2560 if (L.isOutermost())
2561 OuterExitL = nullptr;
2562 }
2563
2564 // If the original loop had exit blocks, walk up through the outer most loop
2565 // of those exit blocks to update LCSSA and form updated dedicated exits.
2566 if (OuterExitL != &L)
2567 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2568 OuterL = OuterL->getParentLoop())
2569 UpdateLoop(*OuterL);
2570
2571#ifndef NDEBUG
2572 // Verify the entire loop structure to catch any incorrect updates before we
2573 // progress in the pass pipeline.
2574 LI.verify(DT);
2575#endif
2576
2577 // Now that we've unswitched something, make callbacks to report the changes.
2578 // For that we need to merge together the updated loops and the cloned loops
2579 // and check whether the original loop survived.
2580 SmallVector<Loop *, 4> SibLoops;
2581 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2582 if (UpdatedL->getParentLoop() == ParentL)
2583 SibLoops.push_back(UpdatedL);
2584 UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2585
2586 if (MSSAU && VerifyMemorySSA)
2587 MSSAU->getMemorySSA()->verifyMemorySSA();
2588
2589 if (BI)
2590 ++NumBranches;
2591 else
2592 ++NumSwitches;
2593}
2594
2595/// Recursively compute the cost of a dominator subtree based on the per-block
2596/// cost map provided.
2597///
2598/// The recursive computation is memozied into the provided DT-indexed cost map
2599/// to allow querying it for most nodes in the domtree without it becoming
2600/// quadratic.
2601static InstructionCost computeDomSubtreeCost(
2602 DomTreeNode &N,
2603 const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
2604 SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
2605 // Don't accumulate cost (or recurse through) blocks not in our block cost
2606 // map and thus not part of the duplication cost being considered.
2607 auto BBCostIt = BBCostMap.find(N.getBlock());
2608 if (BBCostIt == BBCostMap.end())
2609 return 0;
2610
2611 // Lookup this node to see if we already computed its cost.
2612 auto DTCostIt = DTCostMap.find(&N);
2613 if (DTCostIt != DTCostMap.end())
2614 return DTCostIt->second;
2615
2616 // If not, we have to compute it. We can't use insert above and update
2617 // because computing the cost may insert more things into the map.
2618 InstructionCost Cost = std::accumulate(
2619 N.begin(), N.end(), BBCostIt->second,
2620 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2621 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2622 });
2623 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2624 (void)Inserted;
2625 assert(Inserted && "Should not insert a node while visiting children!")(static_cast <bool> (Inserted && "Should not insert a node while visiting children!"
) ? void (0) : __assert_fail ("Inserted && \"Should not insert a node while visiting children!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2625, __extension__
__PRETTY_FUNCTION__))
;
2626 return Cost;
2627}
2628
2629/// Turns a select instruction into implicit control flow branch,
2630/// making the following replacement:
2631///
2632/// head:
2633/// --code before select--
2634/// select %cond, %trueval, %falseval
2635/// --code after select--
2636///
2637/// into
2638///
2639/// head:
2640/// --code before select--
2641/// br i1 %cond, label %then, label %tail
2642///
2643/// then:
2644/// br %tail
2645///
2646/// tail:
2647/// phi [ %trueval, %then ], [ %falseval, %head]
2648/// unreachable
2649///
2650/// It also makes all relevant DT and LI updates, so that all structures are in
2651/// valid state after this transform.
2652static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT,
2653 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2654 AssumptionCache *AC) {
2655 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Turning " <<
*SI << " into a branch.\n"; } } while (false)
;
2656 BasicBlock *HeadBB = SI->getParent();
2657
2658 DomTreeUpdater DTU =
2659 DomTreeUpdater(DT, DomTreeUpdater::UpdateStrategy::Eager);
2660 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2661 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2662 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2663 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2664 *TailBB = CondBr->getSuccessor(1);
2665 if (MSSAU)
2666 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2667
2668 PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI);
2669 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2670 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2671 SI->replaceAllUsesWith(Phi);
2672 SI->eraseFromParent();
2673
2674 if (MSSAU && VerifyMemorySSA)
2675 MSSAU->getMemorySSA()->verifyMemorySSA();
2676
2677 ++NumSelects;
2678 return CondBr;
2679}
2680
2681/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2682/// making the following replacement:
2683///
2684/// --code before guard--
2685/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2686/// --code after guard--
2687///
2688/// into
2689///
2690/// --code before guard--
2691/// br i1 %cond, label %guarded, label %deopt
2692///
2693/// guarded:
2694/// --code after guard--
2695///
2696/// deopt:
2697/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2698/// unreachable
2699///
2700/// It also makes all relevant DT and LI updates, so that all structures are in
2701/// valid state after this transform.
2702static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2703 DominatorTree &DT, LoopInfo &LI,
2704 MemorySSAUpdater *MSSAU) {
2705 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2706 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Turning " <<
*GI << " into a branch.\n"; } } while (false)
;
2707 BasicBlock *CheckBB = GI->getParent();
2708
2709 if (MSSAU && VerifyMemorySSA)
2710 MSSAU->getMemorySSA()->verifyMemorySSA();
2711
2712 // Remove all CheckBB's successors from DomTree. A block can be seen among
2713 // successors more than once, but for DomTree it should be added only once.
2714 SmallPtrSet<BasicBlock *, 4> Successors;
2715 for (auto *Succ : successors(CheckBB))
2716 if (Successors.insert(Succ).second)
2717 DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2718
2719 Instruction *DeoptBlockTerm =
2720 SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2721 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2722 // SplitBlockAndInsertIfThen inserts control flow that branches to
2723 // DeoptBlockTerm if the condition is true. We want the opposite.
2724 CheckBI->swapSuccessors();
2725
2726 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2727 GuardedBlock->setName("guarded");
2728 CheckBI->getSuccessor(1)->setName("deopt");
2729 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2730
2731 if (MSSAU)
2732 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2733
2734 GI->moveBefore(DeoptBlockTerm);
2735 GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
2736
2737 // Add new successors of CheckBB into DomTree.
2738 for (auto *Succ : successors(CheckBB))
2739 DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2740
2741 // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2742 // successors.
2743 for (auto *Succ : Successors)
2744 DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2745
2746 // Make proper changes to DT.
2747 DT.applyUpdates(DTUpdates);
2748 // Inform LI of a new loop block.
2749 L.addBasicBlockToLoop(GuardedBlock, LI);
2750
2751 if (MSSAU) {
2752 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2753 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2754 if (VerifyMemorySSA)
2755 MSSAU->getMemorySSA()->verifyMemorySSA();
2756 }
2757
2758 ++NumGuards;
2759 return CheckBI;
2760}
2761
2762/// Cost multiplier is a way to limit potentially exponential behavior
2763/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2764/// candidates available. Also accounting for the number of "sibling" loops with
2765/// the idea to account for previous unswitches that already happened on this
2766/// cluster of loops. There was an attempt to keep this formula simple,
2767/// just enough to limit the worst case behavior. Even if it is not that simple
2768/// now it is still not an attempt to provide a detailed heuristic size
2769/// prediction.
2770///
2771/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2772/// unswitch candidates, making adequate predictions instead of wild guesses.
2773/// That requires knowing not just the number of "remaining" candidates but
2774/// also costs of unswitching for each of these candidates.
2775static int CalculateUnswitchCostMultiplier(
2776 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2777 const DominatorTree &DT,
2778 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2779
2780 // Guards and other exiting conditions do not contribute to exponential
2781 // explosion as soon as they dominate the latch (otherwise there might be
2782 // another path to the latch remaining that does not allow to eliminate the
2783 // loop copy on unswitch).
2784 const BasicBlock *Latch = L.getLoopLatch();
2785 const BasicBlock *CondBlock = TI.getParent();
2786 if (DT.dominates(CondBlock, Latch) &&
2787 (isGuard(&TI) ||
2788 (TI.isTerminator() &&
2789 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2790 return L.contains(SuccBB);
2791 }) <= 1))) {
2792 NumCostMultiplierSkipped++;
2793 return 1;
2794 }
2795
2796 auto *ParentL = L.getParentLoop();
2797 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2798 : std::distance(LI.begin(), LI.end()));
2799 // Count amount of clones that all the candidates might cause during
2800 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2801 // cases.
2802 int UnswitchedClones = 0;
2803 for (const auto &Candidate : UnswitchCandidates) {
2804 const Instruction *CI = Candidate.TI;
2805 const BasicBlock *CondBlock = CI->getParent();
2806 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2807 if (isa<SelectInst>(CI)) {
2808 UnswitchedClones++;
2809 continue;
2810 }
2811 if (isGuard(CI)) {
2812 if (!SkipExitingSuccessors)
2813 UnswitchedClones++;
2814 continue;
2815 }
2816 int NonExitingSuccessors =
2817 llvm::count_if(successors(CondBlock),
2818 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2819 return !SkipExitingSuccessors || L.contains(SuccBB);
2820 });
2821 UnswitchedClones += Log2_32(NonExitingSuccessors);
2822 }
2823
2824 // Ignore up to the "unscaled candidates" number of unswitch candidates
2825 // when calculating the power-of-two scaling of the cost. The main idea
2826 // with this control is to allow a small number of unswitches to happen
2827 // and rely more on siblings multiplier (see below) when the number
2828 // of candidates is small.
2829 unsigned ClonesPower =
2830 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2831
2832 // Allowing top-level loops to spread a bit more than nested ones.
2833 int SiblingsMultiplier =
2834 std::max((ParentL ? SiblingsCount
2835 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2836 1);
2837 // Compute the cost multiplier in a way that won't overflow by saturating
2838 // at an upper bound.
2839 int CostMultiplier;
2840 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2841 SiblingsMultiplier > UnswitchThreshold)
2842 CostMultiplier = UnswitchThreshold;
2843 else
2844 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2845 (int)UnswitchThreshold);
2846
2847 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplierdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2848 << " (siblings " << SiblingsMultiplier << " * clones "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2849 << (1 << ClonesPower) << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
2850 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed multiplier "
<< CostMultiplier << " (siblings " << SiblingsMultiplier
<< " * clones " << (1 << ClonesPower) <<
")" << " for unswitch candidate: " << TI <<
"\n"; } } while (false)
;
2851 return CostMultiplier;
2852}
2853
2854static bool collectUnswitchCandidates(
2855 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
2856 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2857 const Loop &L, const LoopInfo &LI, AAResults &AA,
2858 const MemorySSAUpdater *MSSAU) {
2859 assert(UnswitchCandidates.empty() && "Should be!")(static_cast <bool> (UnswitchCandidates.empty() &&
"Should be!") ? void (0) : __assert_fail ("UnswitchCandidates.empty() && \"Should be!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 2859, __extension__
__PRETTY_FUNCTION__))
;
23
'?' condition is true
2860 // Whether or not we should also collect guards in the loop.
2861 bool CollectGuards = false;
2862 if (UnswitchGuards) {
24
Assuming the condition is false
25
Taking false branch
2863 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2864 Intrinsic::getName(Intrinsic::experimental_guard));
2865 if (GuardDecl && !GuardDecl->use_empty())
2866 CollectGuards = true;
2867 }
2868
2869 for (auto *BB : L.blocks()) {
26
Assuming '__begin1' is equal to '__end1'
2870 if (LI.getLoopFor(BB) != &L)
2871 continue;
2872
2873 for (auto &I : *BB) {
2874 if (auto *SI = dyn_cast<SelectInst>(&I)) {
2875 auto *Cond = SI->getCondition();
2876 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2877 UnswitchCandidates.push_back({&I, {Cond}});
2878 } else if (CollectGuards && isGuard(&I)) {
2879 auto *Cond =
2880 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2881 // TODO: Support AND, OR conditions and partial unswitching.
2882 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2883 UnswitchCandidates.push_back({&I, {Cond}});
2884 }
2885 }
2886
2887 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2888 // We can only consider fully loop-invariant switch conditions as we need
2889 // to completely eliminate the switch after unswitching.
2890 if (!isa<Constant>(SI->getCondition()) &&
2891 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2892 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2893 continue;
2894 }
2895
2896 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2897 if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2898 BI->getSuccessor(0) == BI->getSuccessor(1))
2899 continue;
2900
2901 Value *Cond = skipTrivialSelect(BI->getCondition());
2902 if (isa<Constant>(Cond))
2903 continue;
2904
2905 if (L.isLoopInvariant(Cond)) {
2906 UnswitchCandidates.push_back({BI, {Cond}});
2907 continue;
2908 }
2909
2910 Instruction &CondI = *cast<Instruction>(Cond);
2911 if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2912 TinyPtrVector<Value *> Invariants =
2913 collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
2914 if (Invariants.empty())
2915 continue;
2916
2917 UnswitchCandidates.push_back({BI, std::move(Invariants)});
2918 continue;
2919 }
2920 }
2921
2922 if (MSSAU
26.1
'MSSAU' is null
26.1
'MSSAU' is null
26.1
'MSSAU' is null
&& !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2923 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2924 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2925 })) {
2926 MemorySSA *MSSA = MSSAU->getMemorySSA();
2927 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2928 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
2929 dbgs() << "simple-loop-unswitch: Found partially invariant condition "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
2930 << *Info->InstToDuplicate[0] << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "simple-loop-unswitch: Found partially invariant condition "
<< *Info->InstToDuplicate[0] << "\n"; } } while
(false)
;
2931 PartialIVInfo = *Info;
2932 PartialIVCondBranch = L.getHeader()->getTerminator();
2933 TinyPtrVector<Value *> ValsToDuplicate;
2934 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2935 UnswitchCandidates.push_back(
2936 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2937 }
2938 }
2939 return !UnswitchCandidates.empty();
27
Returning without writing to 'PartialIVInfo.KnownValue'
2940}
2941
2942/// Tries to canonicalize condition described by:
2943///
2944/// br (LHS pred RHS), label IfTrue, label IfFalse
2945///
2946/// into its equivalent where `Pred` is something that we support for injected
2947/// invariants (so far it is limited to ult), LHS in canonicalized form is
2948/// non-invariant and RHS is an invariant.
2949static void canonicalizeForInvariantConditionInjection(
2950 ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue,
2951 BasicBlock *&IfFalse, const Loop &L) {
2952 if (!L.contains(IfTrue)) {
2953 Pred = ICmpInst::getInversePredicate(Pred);
2954 std::swap(IfTrue, IfFalse);
2955 }
2956
2957 // Move loop-invariant argument to RHS position.
2958 if (L.isLoopInvariant(LHS)) {
2959 Pred = ICmpInst::getSwappedPredicate(Pred);
2960 std::swap(LHS, RHS);
2961 }
2962
2963 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
2964 // Turn "x >=s 0" into "x <u UMIN_INT"
2965 Pred = ICmpInst::ICMP_ULT;
2966 RHS = ConstantInt::get(
2967 RHS->getContext(),
2968 APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
2969 }
2970}
2971
2972/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
2973/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
2974/// injecting a loop-invariant condition.
2975static bool shouldTryInjectInvariantCondition(
2976 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
2977 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
2978 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
2979 return false;
2980 // TODO: Support other predicates.
2981 if (Pred != ICmpInst::ICMP_ULT)
2982 return false;
2983 // TODO: Support non-loop-exiting branches?
2984 if (!L.contains(IfTrue) || L.contains(IfFalse))
2985 return false;
2986 // FIXME: For some reason this causes problems with MSSA updates, need to
2987 // investigate why. So far, just don't unswitch latch.
2988 if (L.getHeader() == IfTrue)
2989 return false;
2990 return true;
2991}
2992
2993/// Returns true, if metadata on \p BI allows us to optimize branching into \p
2994/// TakenSucc via injection of invariant conditions. The branch should be not
2995/// enough and not previously unswitched, the information about this comes from
2996/// the metadata.
2997bool shouldTryInjectBasingOnMetadata(const BranchInst *BI,
2998 const BasicBlock *TakenSucc) {
2999 // Skip branches that have already been unswithed this way. After successful
3000 // unswitching of injected condition, we will still have a copy of this loop
3001 // which looks exactly the same as original one. To prevent the 2nd attempt
3002 // of unswitching it in the same pass, mark this branch as "nothing to do
3003 // here".
3004 if (BI->hasMetadata("llvm.invariant.condition.injection.disabled"))
3005 return false;
3006 SmallVector<uint32_t> Weights;
3007 if (!extractBranchWeights(*BI, Weights))
3008 return false;
3009 unsigned T = InjectInvariantConditionHotnesThreshold;
3010 BranchProbability LikelyTaken(T - 1, T);
3011
3012 assert(Weights.size() == 2 && "Unexpected profile data!")(static_cast <bool> (Weights.size() == 2 && "Unexpected profile data!"
) ? void (0) : __assert_fail ("Weights.size() == 2 && \"Unexpected profile data!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3012, __extension__
__PRETTY_FUNCTION__))
;
3013 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3014 auto Num = Weights[Idx];
3015 auto Denom = Weights[0] + Weights[1];
3016 // Degenerate or overflowed metadata.
3017 if (Denom == 0 || Num > Denom)
3018 return false;
3019 BranchProbability ActualTaken(Num, Denom);
3020 if (LikelyTaken > ActualTaken)
3021 return false;
3022 return true;
3023}
3024
3025/// Materialize pending invariant condition of the given candidate into IR. The
3026/// injected loop-invariant condition implies the original loop-variant branch
3027/// condition, so the materialization turns
3028///
3029/// loop_block:
3030/// ...
3031/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3032///
3033/// into
3034///
3035/// preheader:
3036/// %invariant_cond = LHS pred RHS
3037/// ...
3038/// loop_block:
3039/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3040/// OriginalCheck:
3041/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3042/// ...
3043static NonTrivialUnswitchCandidate
3044injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3045 DominatorTree &DT, LoopInfo &LI,
3046 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3047 assert(Candidate.hasPendingInjection() && "Nothing to inject!")(static_cast <bool> (Candidate.hasPendingInjection() &&
"Nothing to inject!") ? void (0) : __assert_fail ("Candidate.hasPendingInjection() && \"Nothing to inject!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3047, __extension__
__PRETTY_FUNCTION__))
;
3048 BasicBlock *Preheader = L.getLoopPreheader();
3049 assert(Preheader && "Loop is not in simplified form?")(static_cast <bool> (Preheader && "Loop is not in simplified form?"
) ? void (0) : __assert_fail ("Preheader && \"Loop is not in simplified form?\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3049, __extension__
__PRETTY_FUNCTION__))
;
3050 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&(static_cast <bool> (LI.getLoopFor(Candidate.TI->getParent
()) == &L && "Unswitching branch of inner loop!")
? void (0) : __assert_fail ("LI.getLoopFor(Candidate.TI->getParent()) == &L && \"Unswitching branch of inner loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3051, __extension__
__PRETTY_FUNCTION__))
3051 "Unswitching branch of inner loop!")(static_cast <bool> (LI.getLoopFor(Candidate.TI->getParent
()) == &L && "Unswitching branch of inner loop!")
? void (0) : __assert_fail ("LI.getLoopFor(Candidate.TI->getParent()) == &L && \"Unswitching branch of inner loop!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3051, __extension__
__PRETTY_FUNCTION__))
;
3052
3053 auto Pred = Candidate.PendingInjection->Pred;
3054 auto *LHS = Candidate.PendingInjection->LHS;
3055 auto *RHS = Candidate.PendingInjection->RHS;
3056 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3057 auto *TI = cast<BranchInst>(Candidate.TI);
3058 auto *BB = Candidate.TI->getParent();
3059 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3060 : TI->getSuccessor(0);
3061 // FIXME: Remove this once limitation on successors is lifted.
3062 assert(L.contains(InLoopSucc) && "Not supported yet!")(static_cast <bool> (L.contains(InLoopSucc) && "Not supported yet!"
) ? void (0) : __assert_fail ("L.contains(InLoopSucc) && \"Not supported yet!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3062, __extension__
__PRETTY_FUNCTION__))
;
3063 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!")(static_cast <bool> (!L.contains(OutOfLoopSucc) &&
"Not supported yet!") ? void (0) : __assert_fail ("!L.contains(OutOfLoopSucc) && \"Not supported yet!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3063, __extension__
__PRETTY_FUNCTION__))
;
3064 auto &Ctx = BB->getContext();
3065
3066 IRBuilder<> Builder(Preheader->getTerminator());
3067 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!")(static_cast <bool> (ICmpInst::isUnsigned(Pred) &&
"Not supported yet!") ? void (0) : __assert_fail ("ICmpInst::isUnsigned(Pred) && \"Not supported yet!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3067, __extension__
__PRETTY_FUNCTION__))
;
3068 if (LHS->getType() != RHS->getType()) {
3069 if (LHS->getType()->getIntegerBitWidth() <
3070 RHS->getType()->getIntegerBitWidth())
3071 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3072 else
3073 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3074 }
3075 // Do not use builder here: CreateICmp may simplify this into a constant and
3076 // unswitching will break. Better optimize it away later.
3077 auto *InjectedCond =
3078 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3079 Preheader->getTerminator());
3080 auto *OldCond = TI->getCondition();
3081
3082 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3083 BB->getParent(), InLoopSucc);
3084 Builder.SetInsertPoint(TI);
3085 auto *InvariantBr =
3086 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3087
3088 Builder.SetInsertPoint(CheckBlock);
3089 auto *NewTerm = Builder.CreateCondBr(OldCond, InLoopSucc, OutOfLoopSucc);
3090
3091 TI->eraseFromParent();
3092 // Prevent infinite unswitching.
3093 NewTerm->setMetadata("llvm.invariant.condition.injection.disabled",
3094 MDNode::get(BB->getContext(), {}));
3095
3096 // Fixup phis.
3097 for (auto &I : *InLoopSucc) {
3098 auto *PN = dyn_cast<PHINode>(&I);
3099 if (!PN)
3100 break;
3101 auto *Inc = PN->getIncomingValueForBlock(BB);
3102 PN->addIncoming(Inc, CheckBlock);
3103 }
3104 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3105
3106 SmallVector<DominatorTree::UpdateType, 4> DTUpdates = {
3107 { DominatorTree::Insert, BB, CheckBlock },
3108 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3109 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3110 { DominatorTree::Delete, BB, OutOfLoopSucc }
3111 };
3112
3113 DT.applyUpdates(DTUpdates);
3114 if (MSSAU)
3115 MSSAU->applyUpdates(DTUpdates, DT);
3116 L.addBasicBlockToLoop(CheckBlock, LI);
3117
3118#ifndef NDEBUG
3119 DT.verify();
3120 LI.verify(DT);
3121 if (MSSAU && VerifyMemorySSA)
3122 MSSAU->getMemorySSA()->verifyMemorySSA();
3123#endif
3124
3125 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3126 // higher because we have just inserted a new block. Need to think how to
3127 // adjust the cost of injected candidates when it was first computed.
3128 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Injected a new loop-invariant branch "
<< *InvariantBr << " and considering it for unswitching."
; } } while (false)
3129 << " and considering it for unswitching.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Injected a new loop-invariant branch "
<< *InvariantBr << " and considering it for unswitching."
; } } while (false)
;
3130 ++NumInvariantConditionsInjected;
3131 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3132 Candidate.Cost);
3133}
3134
3135/// Given chain of loop branch conditions looking like:
3136/// br (Variant < Invariant1)
3137/// br (Variant < Invariant2)
3138/// br (Variant < Invariant3)
3139/// ...
3140/// collect set of invariant conditions on which we want to unswitch, which
3141/// look like:
3142/// Invariant1 <= Invariant2
3143/// Invariant2 <= Invariant3
3144/// ...
3145/// Though they might not immediately exist in the IR, we can still inject them.
3146static bool insertCandidatesWithPendingInjections(
3147 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3148 ICmpInst::Predicate Pred, ArrayRef<CompareDesc> Compares,
3149 const DominatorTree &DT) {
3150
3151 assert(ICmpInst::isRelational(Pred))(static_cast <bool> (ICmpInst::isRelational(Pred)) ? void
(0) : __assert_fail ("ICmpInst::isRelational(Pred)", "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 3151, __extension__ __PRETTY_FUNCTION__))
;
3152 assert(ICmpInst::isStrictPredicate(Pred))(static_cast <bool> (ICmpInst::isStrictPredicate(Pred))
? void (0) : __assert_fail ("ICmpInst::isStrictPredicate(Pred)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3152, __extension__
__PRETTY_FUNCTION__))
;
3153 if (Compares.size() < 2)
3154 return false;
3155 ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
3156 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3157 Next != Compares.end(); ++Prev, ++Next) {
3158 Value *LHS = Next->Invariant;
3159 Value *RHS = Prev->Invariant;
3160 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3161 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3162 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3163 std::nullopt, std::move(ToInject));
3164 UnswitchCandidates.push_back(std::move(Candidate));
3165 }
3166 return true;
3167}
3168
3169/// Collect unswitch candidates by invariant conditions that are not immediately
3170/// present in the loop. However, they can be injected into the code if we
3171/// decide it's profitable.
3172/// An example of such conditions is following:
3173///
3174/// for (...) {
3175/// x = load ...
3176/// if (! x <u C1) break;
3177/// if (! x <u C2) break;
3178/// <do something>
3179/// }
3180///
3181/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3182/// C2" automatically implies "x <u C2", so we can get rid of one of
3183/// loop-variant checks in unswitched loop version.
3184static bool collectUnswitchCandidatesWithInjections(
3185 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
3186 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3187 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3188 const MemorySSAUpdater *MSSAU) {
3189 if (!InjectInvariantConditions)
30
Assuming the condition is false
31
Taking false branch
3190 return false;
3191
3192 if (!DT.isReachableFromEntry(L.getHeader()))
32
Assuming the condition is false
33
Taking false branch
3193 return false;
3194 auto *Latch = L.getLoopLatch();
3195 // Need to have a single latch and a preheader.
3196 if (!Latch)
34
Assuming 'Latch' is non-null
3197 return false;
3198 assert(L.getLoopPreheader() && "Must have a preheader!")(static_cast <bool> (L.getLoopPreheader() && "Must have a preheader!"
) ? void (0) : __assert_fail ("L.getLoopPreheader() && \"Must have a preheader!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3198, __extension__
__PRETTY_FUNCTION__))
;
35
Taking false branch
36
Assuming the condition is true
37
'?' condition is true
3199
3200 DenseMap<Value *, SmallVector<CompareDesc, 4> > CandidatesULT;
3201 // Traverse the conditions that dominate latch (and therefore dominate each
3202 // other).
3203 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
38
Loop condition is false. Execution continues on line 3232
3204 DTN = DTN->getIDom()) {
3205 ICmpInst::Predicate Pred;
3206 Value *LHS = nullptr, *RHS = nullptr;
3207 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3208 auto *BB = DTN->getBlock();
3209 // Ignore inner loops.
3210 if (LI.getLoopFor(BB) != &L)
3211 continue;
3212 auto *Term = BB->getTerminator();
3213 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3214 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3215 continue;
3216 if (!LHS->getType()->isIntegerTy())
3217 continue;
3218 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3219 L);
3220 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3221 continue;
3222 if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
3223 continue;
3224 // Strip ZEXT for unsigned predicate.
3225 // TODO: once signed predicates are supported, also strip SEXT.
3226 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3227 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3228 LHS = Zext->getOperand(0);
3229 CandidatesULT[LHS].push_back(Desc);
3230 }
3231
3232 bool Found = false;
3233 for (auto &It : CandidatesULT)
3234 Found |= insertCandidatesWithPendingInjections(
3235 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3236 return Found;
39
Returning without writing to 'PartialIVInfo.KnownValue'
3237}
3238
3239static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) {
3240 if (!L.isSafeToClone())
3241 return false;
3242 for (auto *BB : L.blocks())
3243 for (auto &I : *BB) {
3244 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3245 return false;
3246 if (auto *CB = dyn_cast<CallBase>(&I)) {
3247 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().")(static_cast <bool> (!CB->cannotDuplicate() &&
"Checked by L.isSafeToClone().") ? void (0) : __assert_fail (
"!CB->cannotDuplicate() && \"Checked by L.isSafeToClone().\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3247, __extension__
__PRETTY_FUNCTION__))
;
3248 if (CB->isConvergent())
3249 return false;
3250 }
3251 }
3252
3253 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3254 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3255 // irreducible control flow into reducible control flow and introduce new
3256 // loops "out of thin air". If we ever discover important use cases for doing
3257 // this, we can add support to loop unswitch, but it is a lot of complexity
3258 // for what seems little or no real world benefit.
3259 LoopBlocksRPO RPOT(&L);
3260 RPOT.perform(&LI);
3261 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3262 return false;
3263
3264 SmallVector<BasicBlock *, 4> ExitBlocks;
3265 L.getUniqueExitBlocks(ExitBlocks);
3266 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3267 // instruction as we don't know how to split those exit blocks.
3268 // FIXME: We should teach SplitBlock to handle this and remove this
3269 // restriction.
3270 for (auto *ExitBB : ExitBlocks) {
3271 auto *I = ExitBB->getFirstNonPHI();
3272 if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
3273 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
"in exit block\n"; } } while (false)
3274 "in exit block\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
"in exit block\n"; } } while (false)
;
3275 return false;
3276 }
3277 }
3278
3279 return true;
3280}
3281
3282static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3283 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3284 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3285 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3286 // Given that unswitching these terminators will require duplicating parts of
3287 // the loop, so we need to be able to model that cost. Compute the ephemeral
3288 // values and set up a data structure to hold per-BB costs. We cache each
3289 // block's cost so that we don't recompute this when considering different
3290 // subsets of the loop for duplication during unswitching.
3291 SmallPtrSet<const Value *, 4> EphValues;
3292 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3293 SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
3294
3295 // Compute the cost of each block, as well as the total loop cost. Also, bail
3296 // out if we see instructions which are incompatible with loop unswitching
3297 // (convergent, noduplicate, or cross-basic-block tokens).
3298 // FIXME: We might be able to safely handle some of these in non-duplicated
3299 // regions.
3300 TargetTransformInfo::TargetCostKind CostKind =
3301 L.getHeader()->getParent()->hasMinSize()
45
Assuming the condition is false
46
'?' condition is false
3302 ? TargetTransformInfo::TCK_CodeSize
3303 : TargetTransformInfo::TCK_SizeAndLatency;
3304 InstructionCost LoopCost = 0;
3305 for (auto *BB : L.blocks()) {
47
Assuming '__begin1' is equal to '__end1'
3306 InstructionCost Cost = 0;
3307 for (auto &I : *BB) {
3308 if (EphValues.count(&I))
3309 continue;
3310 Cost += TTI.getInstructionCost(&I, CostKind);
3311 }
3312 assert(Cost >= 0 && "Must not have negative costs!")(static_cast <bool> (Cost >= 0 && "Must not have negative costs!"
) ? void (0) : __assert_fail ("Cost >= 0 && \"Must not have negative costs!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3312, __extension__
__PRETTY_FUNCTION__))
;
3313 LoopCost += Cost;
3314 assert(LoopCost >= 0 && "Must not have negative loop costs!")(static_cast <bool> (LoopCost >= 0 && "Must not have negative loop costs!"
) ? void (0) : __assert_fail ("LoopCost >= 0 && \"Must not have negative loop costs!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3314, __extension__
__PRETTY_FUNCTION__))
;
3315 BBCostMap[BB] = Cost;
3316 }
3317 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Total loop cost: "
<< LoopCost << "\n"; } } while (false)
;
48
Assuming 'DebugFlag' is false
49
Loop condition is false. Exiting loop
3318
3319 // Now we find the best candidate by searching for the one with the following
3320 // properties in order:
3321 //
3322 // 1) An unswitching cost below the threshold
3323 // 2) The smallest number of duplicated unswitch candidates (to avoid
3324 // creating redundant subsequent unswitching)
3325 // 3) The smallest cost after unswitching.
3326 //
3327 // We prioritize reducing fanout of unswitch candidates provided the cost
3328 // remains below the threshold because this has a multiplicative effect.
3329 //
3330 // This requires memoizing each dominator subtree to avoid redundant work.
3331 //
3332 // FIXME: Need to actually do the number of candidates part above.
3333 SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
3334 // Given a terminator which might be unswitched, computes the non-duplicated
3335 // cost for that terminator.
3336 auto ComputeUnswitchedCost = [&](Instruction &TI,
3337 bool FullUnswitch) -> InstructionCost {
3338 BasicBlock &BB = *TI.getParent();
3339 SmallPtrSet<BasicBlock *, 4> Visited;
3340
3341 InstructionCost Cost = 0;
3342 for (BasicBlock *SuccBB : successors(&BB)) {
3343 // Don't count successors more than once.
3344 if (!Visited.insert(SuccBB).second)
55
Assuming field 'second' is true
56
Taking false branch
3345 continue;
3346
3347 // If this is a partial unswitch candidate, then it must be a conditional
3348 // branch with a condition of either `or`, `and`, their corresponding
3349 // select forms or partially invariant instructions. In that case, one of
3350 // the successors is necessarily duplicated, so don't even try to remove
3351 // its cost.
3352 if (!FullUnswitch
56.1
'FullUnswitch' is false
56.1
'FullUnswitch' is false
56.1
'FullUnswitch' is false
) {
57
Taking true branch
3353 auto &BI = cast<BranchInst>(TI);
58
'TI' is a 'BranchInst'
3354 Value *Cond = skipTrivialSelect(BI.getCondition());
3355 if (match(Cond, m_LogicalAnd())) {
59
Assuming the condition is false
60
Taking false branch
3356 if (SuccBB == BI.getSuccessor(1))
3357 continue;
3358 } else if (match(Cond, m_LogicalOr())) {
61
Assuming the condition is false
3359 if (SuccBB == BI.getSuccessor(0))
3360 continue;
3361 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
62
Called C++ object pointer is null
3362 SuccBB == BI.getSuccessor(0)) ||
3363 (!PartialIVInfo.KnownValue->isOneValue() &&
3364 SuccBB == BI.getSuccessor(1)))
3365 continue;
3366 }
3367
3368 // This successor's domtree will not need to be duplicated after
3369 // unswitching if the edge to the successor dominates it (and thus the
3370 // entire tree). This essentially means there is no other path into this
3371 // subtree and so it will end up live in only one clone of the loop.
3372 if (SuccBB->getUniquePredecessor() ||
3373 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3374 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3375 })) {
3376 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3377 assert(Cost <= LoopCost &&(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!"
) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3378, __extension__
__PRETTY_FUNCTION__))
3378 "Non-duplicated cost should never exceed total loop cost!")(static_cast <bool> (Cost <= LoopCost && "Non-duplicated cost should never exceed total loop cost!"
) ? void (0) : __assert_fail ("Cost <= LoopCost && \"Non-duplicated cost should never exceed total loop cost!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3378, __extension__
__PRETTY_FUNCTION__))
;
3379 }
3380 }
3381
3382 // Now scale the cost by the number of unique successors minus one. We
3383 // subtract one because there is already at least one copy of the entire
3384 // loop. This is computing the new cost of unswitching a condition.
3385 // Note that guards always have 2 unique successors that are implicit and
3386 // will be materialized if we decide to unswitch it.
3387 int SuccessorsCount =
3388 isGuard(&TI) || isa<SelectInst>(TI) ? 2 : Visited.size();
3389 assert(SuccessorsCount > 1 &&(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"
) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3390, __extension__
__PRETTY_FUNCTION__))
3390 "Cannot unswitch a condition without multiple distinct successors!")(static_cast <bool> (SuccessorsCount > 1 && "Cannot unswitch a condition without multiple distinct successors!"
) ? void (0) : __assert_fail ("SuccessorsCount > 1 && \"Cannot unswitch a condition without multiple distinct successors!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3390, __extension__
__PRETTY_FUNCTION__))
;
3391 return (LoopCost - Cost) * (SuccessorsCount - 1);
3392 };
3393
3394 std::optional<NonTrivialUnswitchCandidate> Best;
3395 for (auto &Candidate : UnswitchCandidates) {
50
Assuming '__begin1' is not equal to '__end1'
3396 Instruction &TI = *Candidate.TI;
3397 ArrayRef<Value *> Invariants = Candidate.Invariants;
3398 BranchInst *BI = dyn_cast<BranchInst>(&TI);
51
Assuming the object is a 'CastReturnType'
3399 bool FullUnswitch =
3400 !BI
51.1
'BI' is non-null
51.1
'BI' is non-null
51.1
'BI' is non-null
|| Candidate.hasPendingInjection() ||
52
Assuming the condition is false
3401 (Invariants.size() == 1 &&
53
Assuming the condition is false
3402 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3403 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
54
Calling 'operator()'
3404 // Calculate cost multiplier which is a tool to limit potentially
3405 // exponential behavior of loop-unswitch.
3406 if (EnableUnswitchCostMultiplier) {
3407 int CostMultiplier =
3408 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3409 assert((static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3411, __extension__
__PRETTY_FUNCTION__))
3410 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3411, __extension__
__PRETTY_FUNCTION__))
3411 "cost multiplier needs to be in the range of 1..UnswitchThreshold")(static_cast <bool> ((CostMultiplier > 0 && CostMultiplier
<= UnswitchThreshold) && "cost multiplier needs to be in the range of 1..UnswitchThreshold"
) ? void (0) : __assert_fail ("(CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && \"cost multiplier needs to be in the range of 1..UnswitchThreshold\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3411, __extension__
__PRETTY_FUNCTION__))
;
3412 CandidateCost *= CostMultiplier;
3413 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
3414 << " (multiplier: " << CostMultiplier << ")"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
3415 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " (multiplier: " << CostMultiplier
<< ")" << " for unswitch candidate: " << TI
<< "\n"; } } while (false)
;
3416 } else {
3417 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " for unswitch candidate: " <<
TI << "\n"; } } while (false)
3418 << " for unswitch candidate: " << TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Computed cost of "
<< CandidateCost << " for unswitch candidate: " <<
TI << "\n"; } } while (false)
;
3419 }
3420
3421 if (!Best || CandidateCost < Best->Cost) {
3422 Best = Candidate;
3423 Best->Cost = CandidateCost;
3424 }
3425 }
3426 assert(Best && "Must be!")(static_cast <bool> (Best && "Must be!") ? void
(0) : __assert_fail ("Best && \"Must be!\"", "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp"
, 3426, __extension__ __PRETTY_FUNCTION__))
;
3427 return *Best;
3428}
3429
3430// Insert a freeze on an unswitched branch if all is true:
3431// 1. freeze-loop-unswitch-cond option is true
3432// 2. The branch may not execute in the loop pre-transformation. If a branch may
3433// not execute and could cause UB, it would always cause UB if it is hoisted outside
3434// of the loop. Insert a freeze to prevent this case.
3435// 3. The branch condition may be poison or undef
3436static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT,
3437 AssumptionCache &AC) {
3438 assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI))(static_cast <bool> (isa<BranchInst>(TI) || isa<
SwitchInst>(TI)) ? void (0) : __assert_fail ("isa<BranchInst>(TI) || isa<SwitchInst>(TI)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3438, __extension__
__PRETTY_FUNCTION__))
;
3439 if (!FreezeLoopUnswitchCond)
3440 return false;
3441
3442 ICFLoopSafetyInfo SafetyInfo;
3443 SafetyInfo.computeLoopSafetyInfo(&L);
3444 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3445 return false;
3446
3447 Value *Cond;
3448 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3449 Cond = skipTrivialSelect(BI->getCondition());
3450 else
3451 Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
3452 return !isGuaranteedNotToBeUndefOrPoison(
3453 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3454}
3455
3456static bool unswitchBestCondition(
3457 Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
3458 AAResults &AA, TargetTransformInfo &TTI,
3459 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3460 ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
3461 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3462 // Collect all invariant conditions within this loop (as opposed to an inner
3463 // loop which would be handled when visiting that inner loop).
3464 SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
3465 IVConditionInfo PartialIVInfo;
19
Calling implicit default constructor for 'IVConditionInfo'
21
Returning from default constructor for 'IVConditionInfo'
3466 Instruction *PartialIVCondBranch = nullptr;
3467 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
22
Calling 'collectUnswitchCandidates'
28
Returning from 'collectUnswitchCandidates'
3468 PartialIVCondBranch, L, LI, AA, MSSAU);
3469 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
29
Calling 'collectUnswitchCandidatesWithInjections'
40
Returning from 'collectUnswitchCandidatesWithInjections'
3470 PartialIVCondBranch, L, DT, LI, AA,
3471 MSSAU);
3472 // If we didn't find any candidates, we're done.
3473 if (UnswitchCandidates.empty())
3474 return false;
3475
3476 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
41
Taking false branch
42
Assuming 'DebugFlag' is false
43
Loop condition is false. Exiting loop
3477 dbgs() << "Considering " << UnswitchCandidates.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
3478 << " non-trivial loop invariant conditions for unswitching.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Considering " <<
UnswitchCandidates.size() << " non-trivial loop invariant conditions for unswitching.\n"
; } } while (false)
;
3479
3480 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
44
Calling 'findBestNonTrivialUnswitchCandidate'
3481 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3482
3483 assert(Best.TI && "Failed to find loop unswitch candidate")(static_cast <bool> (Best.TI && "Failed to find loop unswitch candidate"
) ? void (0) : __assert_fail ("Best.TI && \"Failed to find loop unswitch candidate\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3483, __extension__
__PRETTY_FUNCTION__))
;
3484 assert(Best.Cost && "Failed to compute cost")(static_cast <bool> (Best.Cost && "Failed to compute cost"
) ? void (0) : __assert_fail ("Best.Cost && \"Failed to compute cost\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3484, __extension__
__PRETTY_FUNCTION__))
;
3485
3486 if (*Best.Cost >= UnswitchThreshold) {
3487 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Costdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: "
<< *Best.Cost << "\n"; } } while (false)
3488 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Cannot unswitch, lowest cost found: "
<< *Best.Cost << "\n"; } } while (false)
;
3489 return false;
3490 }
3491
3492 if (Best.hasPendingInjection())
3493 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3494 assert(!Best.hasPendingInjection() &&(static_cast <bool> (!Best.hasPendingInjection() &&
"All injections should have been done by now!") ? void (0) :
__assert_fail ("!Best.hasPendingInjection() && \"All injections should have been done by now!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3495, __extension__
__PRETTY_FUNCTION__))
3495 "All injections should have been done by now!")(static_cast <bool> (!Best.hasPendingInjection() &&
"All injections should have been done by now!") ? void (0) :
__assert_fail ("!Best.hasPendingInjection() && \"All injections should have been done by now!\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3495, __extension__
__PRETTY_FUNCTION__))
;
3496
3497 if (Best.TI != PartialIVCondBranch)
3498 PartialIVInfo.InstToDuplicate.clear();
3499
3500 bool InsertFreeze;
3501 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3502 // If the best candidate is a select, turn it into a branch. Select
3503 // instructions with a poison conditional do not propagate poison, but
3504 // branching on poison causes UB. Insert a freeze on the select
3505 // conditional to prevent UB after turning the select into a branch.
3506 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3507 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3508 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3509 } else {
3510 // If the best candidate is a guard, turn it into a branch.
3511 if (isGuard(Best.TI))
3512 Best.TI =
3513 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3514 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3515 }
3516
3517 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Costdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< Best.Cost << ") terminator: " << *Best.
TI << "\n"; } } while (false)
3518 << ") terminator: " << *Best.TI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Unswitching non-trivial (cost = "
<< Best.Cost << ") terminator: " << *Best.
TI << "\n"; } } while (false)
;
3519 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3520 LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB,
3521 InsertFreeze);
3522 return true;
3523}
3524
3525/// Unswitch control flow predicated on loop invariant conditions.
3526///
3527/// This first hoists all branches or switches which are trivial (IE, do not
3528/// require duplicating any part of the loop) out of the loop body. It then
3529/// looks at other loop invariant control flows and tries to unswitch those as
3530/// well by cloning the loop if the result is small enough.
3531///
3532/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3533/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3534/// valid (i.e. its use is enabled).
3535///
3536/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3537/// true, we will attempt to do non-trivial unswitching as well as trivial
3538/// unswitching.
3539///
3540/// The `UnswitchCB` callback provided will be run after unswitching is
3541/// complete, with the first parameter set to `true` if the provided loop
3542/// remains a loop, and a list of new sibling loops created.
3543///
3544/// If `SE` is non-null, we will update that analysis based on the unswitching
3545/// done.
3546static bool
3547unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
3548 AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
3549 bool NonTrivial,
3550 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
3551 ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
3552 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
3553 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3554 assert(L.isRecursivelyLCSSAForm(DT, LI) &&(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.") ? void (0
) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3555, __extension__
__PRETTY_FUNCTION__))
8
Assuming the condition is true
9
'?' condition is true
3555 "Loops must be in LCSSA form before unswitching.")(static_cast <bool> (L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.") ? void (0
) : __assert_fail ("L.isRecursivelyLCSSAForm(DT, LI) && \"Loops must be in LCSSA form before unswitching.\""
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3555, __extension__
__PRETTY_FUNCTION__))
;
3556
3557 // Must be in loop simplified form: we need a preheader and dedicated exits.
3558 if (!L.isLoopSimplifyForm())
10
Assuming the condition is false
3559 return false;
3560
3561 // Try trivial unswitch first before loop over other basic blocks in the loop.
3562 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
11
Assuming 'Trivial' is false
3563 // If we unswitched successfully we will want to clean up the loop before
3564 // processing it further so just mark it as unswitched and return.
3565 UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3566 return true;
3567 }
3568
3569 // Check whether we should continue with non-trivial conditions.
3570 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3571 // unswitching for testing and debugging.
3572 // NonTrivial: Parameter that enables non-trivial unswitching for this
3573 // invocation of the transform. But this should be allowed only
3574 // for targets without branch divergence.
3575 //
3576 // FIXME: If divergence analysis becomes available to a loop
3577 // transform, we should allow unswitching for non-trivial uniform
3578 // branches even on targets that have divergence.
3579 // https://bugs.llvm.org/show_bug.cgi?id=48819
3580 bool ContinueWithNonTrivial =
3581 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
12
Assuming the condition is true
3582 if (!ContinueWithNonTrivial
12.1
'ContinueWithNonTrivial' is true
12.1
'ContinueWithNonTrivial' is true
12.1
'ContinueWithNonTrivial' is true
)
13
Taking false branch
3583 return false;
3584
3585 // Skip non-trivial unswitching for optsize functions.
3586 if (L.getHeader()->getParent()->hasOptSize())
14
Assuming the condition is false
15
Taking false branch
3587 return false;
3588
3589 // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
3590 // of the loops L is nested in, and of the loops nested in L are all cold.
3591 auto IsLoopNestCold = [&](const Loop *L) {
3592 // Check L and all of its parent loops.
3593 auto *Parent = L;
3594 while (Parent) {
3595 if (!PSI->isColdBlock(Parent->getHeader(), BFI))
3596 return false;
3597 Parent = Parent->getParentLoop();
3598 }
3599 // Next check all loops nested within L.
3600 SmallVector<const Loop *, 4> Worklist;
3601 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
3602 L->getSubLoops().end());
3603 while (!Worklist.empty()) {
3604 auto *CurLoop = Worklist.pop_back_val();
3605 if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
3606 return false;
3607 Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),
3608 CurLoop->getSubLoops().end());
3609 }
3610 return true;
3611 };
3612
3613 // Skip cold loops in cold loop nests, as unswitching them brings little
3614 // benefit but increases the code size
3615 if (PSI
15.1
'PSI' is null
15.1
'PSI' is null
15.1
'PSI' is null
&& PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
3616 LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << " Skip cold loop: "
<< L << "\n"; } } while (false)
;
3617 return false;
3618 }
3619
3620 // Perform legality checks.
3621 if (!isSafeForNoNTrivialUnswitching(L, LI))
16
Assuming the condition is false
17
Taking false branch
3622 return false;
3623
3624 // For non-trivial unswitching, because it often creates new loops, we rely on
3625 // the pass manager to iterate on the loops rather than trying to immediately
3626 // reach a fixed point. There is no substantial advantage to iterating
3627 // internally, and if any of the new loops are simplified enough to contain
3628 // trivial unswitching we want to prefer those.
3629
3630 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3631 // a partial unswitch when possible below the threshold.
3632 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
18
Calling 'unswitchBestCondition'
3633 DestroyLoopCB))
3634 return true;
3635
3636 // No other opportunities to unswitch.
3637 return false;
3638}
3639
3640PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
3641 LoopStandardAnalysisResults &AR,
3642 LPMUpdater &U) {
3643 Function &F = *L.getHeader()->getParent();
3644 (void)F;
3645 ProfileSummaryInfo *PSI = nullptr;
3646 if (auto OuterProxy
0.1
'OuterProxy' is null
0.1
'OuterProxy' is null
0.1
'OuterProxy' is null
=
3647 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
3648 .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3649 PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3650 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << L << "\n";
} } while (false)
1
Taking false branch
2
Assuming 'DebugFlag' is false
3
Loop condition is false. Exiting loop
3651 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << L << "\n";
} } while (false)
;
3652
3653 // Save the current loop name in a variable so that we can report it even
3654 // after it has been deleted.
3655 std::string LoopName = std::string(L.getName());
3656
3657 auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3658 bool PartiallyInvariant,
3659 ArrayRef<Loop *> NewLoops) {
3660 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3661 if (!NewLoops.empty())
3662 U.addSiblingLoops(NewLoops);
3663
3664 // If the current loop remains valid, we should revisit it to catch any
3665 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3666 if (CurrentLoopValid) {
3667 if (PartiallyInvariant) {
3668 // Mark the new loop as partially unswitched, to avoid unswitching on
3669 // the same condition again.
3670 auto &Context = L.getHeader()->getContext();
3671 MDNode *DisableUnswitchMD = MDNode::get(
3672 Context,
3673 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3674 MDNode *NewLoopID = makePostTransformationMetadata(
3675 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3676 {DisableUnswitchMD});
3677 L.setLoopID(NewLoopID);
3678 } else
3679 U.revisitCurrentLoop();
3680 } else
3681 U.markLoopAsDeleted(L, LoopName);
3682 };
3683
3684 auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
3685 U.markLoopAsDeleted(L, Name);
3686 };
3687
3688 std::optional<MemorySSAUpdater> MSSAU;
3689 if (AR.MSSA) {
4
Assuming field 'MSSA' is null
5
Taking false branch
3690 MSSAU = MemorySSAUpdater(AR.MSSA);
3691 if (VerifyMemorySSA)
3692 AR.MSSA->verifyMemorySSA();
3693 }
3694 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
7
Calling 'unswitchLoop'
3695 UnswitchCB, &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI,
6
'?' condition is false
3696 DestroyLoopCB))
3697 return PreservedAnalyses::all();
3698
3699 if (AR.MSSA && VerifyMemorySSA)
3700 AR.MSSA->verifyMemorySSA();
3701
3702 // Historically this pass has had issues with the dominator tree so verify it
3703 // in asserts builds.
3704 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (AR.DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("AR.DT.verify(DominatorTree::VerificationLevel::Fast)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3704, __extension__
__PRETTY_FUNCTION__))
;
3705
3706 auto PA = getLoopPassPreservedAnalyses();
3707 if (AR.MSSA)
3708 PA.preserve<MemorySSAAnalysis>();
3709 return PA;
3710}
3711
3712void SimpleLoopUnswitchPass::printPipeline(
3713 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3714 static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
3715 OS, MapClassName2PassName);
3716
3717 OS << '<';
3718 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3719 OS << (Trivial ? "" : "no-") << "trivial";
3720 OS << '>';
3721}
3722
3723namespace {
3724
3725class SimpleLoopUnswitchLegacyPass : public LoopPass {
3726 bool NonTrivial;
3727
3728public:
3729 static char ID; // Pass ID, replacement for typeid
3730
3731 explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3732 : LoopPass(ID), NonTrivial(NonTrivial) {
3733 initializeSimpleLoopUnswitchLegacyPassPass(
3734 *PassRegistry::getPassRegistry());
3735 }
3736
3737 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3738
3739 void getAnalysisUsage(AnalysisUsage &AU) const override {
3740 AU.addRequired<AssumptionCacheTracker>();
3741 AU.addRequired<TargetTransformInfoWrapperPass>();
3742 AU.addRequired<MemorySSAWrapperPass>();
3743 AU.addPreserved<MemorySSAWrapperPass>();
3744 getLoopAnalysisUsage(AU);
3745 }
3746};
3747
3748} // end anonymous namespace
3749
3750bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3751 if (skipLoop(L))
3752 return false;
3753
3754 Function &F = *L->getHeader()->getParent();
3755
3756 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *Ldo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << *L << "\n"
; } } while (false)
3757 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("simple-loop-unswitch")) { dbgs() << "Unswitching loop in "
<< F.getName() << ": " << *L << "\n"
; } } while (false)
;
3758 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3759 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3760 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3761 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3762 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3763 MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3764 MemorySSAUpdater MSSAU(MSSA);
3765
3766 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3767 auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3768
3769 auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3770 ArrayRef<Loop *> NewLoops) {
3771 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3772 for (auto *NewL : NewLoops)
3773 LPM.addLoop(*NewL);
3774
3775 // If the current loop remains valid, re-add it to the queue. This is
3776 // a little wasteful as we'll finish processing the current loop as well,
3777 // but it is the best we can do in the old PM.
3778 if (CurrentLoopValid) {
3779 // If the current loop has been unswitched using a partially invariant
3780 // condition, we should not re-add the current loop to avoid unswitching
3781 // on the same condition again.
3782 if (!PartiallyInvariant)
3783 LPM.addLoop(*L);
3784 } else
3785 LPM.markLoopAsDeleted(*L);
3786 };
3787
3788 auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
3789 LPM.markLoopAsDeleted(L);
3790 };
3791
3792 if (VerifyMemorySSA)
3793 MSSA->verifyMemorySSA();
3794 bool Changed =
3795 unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
3796 &MSSAU, nullptr, nullptr, DestroyLoopCB);
3797
3798 if (VerifyMemorySSA)
3799 MSSA->verifyMemorySSA();
3800
3801 // Historically this pass has had issues with the dominator tree so verify it
3802 // in asserts builds.
3803 assert(DT.verify(DominatorTree::VerificationLevel::Fast))(static_cast <bool> (DT.verify(DominatorTree::VerificationLevel
::Fast)) ? void (0) : __assert_fail ("DT.verify(DominatorTree::VerificationLevel::Fast)"
, "llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp", 3803, __extension__
__PRETTY_FUNCTION__))
;
3804
3805 return Changed;
3806}
3807
3808char SimpleLoopUnswitchLegacyPass::ID = 0;
3809INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3810 "Simple unswitch loops", false, false)static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3811INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
3812INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
3813INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
3814INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
3815INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
3816INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
3817INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3818 "Simple unswitch loops", false, false)PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3819
3820Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
3821 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3822}

/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h

1// Components for manipulating sequences of characters -*- C++ -*-
2
3// Copyright (C) 1997-2020 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file bits/basic_string.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{string}
28 */
29
30//
31// ISO C++ 14882: 21 Strings library
32//
33
34#ifndef _BASIC_STRING_H1
35#define _BASIC_STRING_H1 1
36
37#pragma GCC system_header
38
39#include <ext/atomicity.h>
40#include <ext/alloc_traits.h>
41#include <debug/debug.h>
42
43#if __cplusplus201703L >= 201103L
44#include <initializer_list>
45#endif
46
47#if __cplusplus201703L >= 201703L
48# include <string_view>
49#endif
50
51
52namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
53{
54_GLIBCXX_BEGIN_NAMESPACE_VERSION
55
56#if _GLIBCXX_USE_CXX11_ABI1
57_GLIBCXX_BEGIN_NAMESPACE_CXX11namespace __cxx11 {
58 /**
59 * @class basic_string basic_string.h <string>
60 * @brief Managing sequences of characters and character-like objects.
61 *
62 * @ingroup strings
63 * @ingroup sequences
64 *
65 * @tparam _CharT Type of character
66 * @tparam _Traits Traits for character type, defaults to
67 * char_traits<_CharT>.
68 * @tparam _Alloc Allocator type, defaults to allocator<_CharT>.
69 *
70 * Meets the requirements of a <a href="tables.html#65">container</a>, a
71 * <a href="tables.html#66">reversible container</a>, and a
72 * <a href="tables.html#67">sequence</a>. Of the
73 * <a href="tables.html#68">optional sequence requirements</a>, only
74 * @c push_back, @c at, and @c %array access are supported.
75 */
76 template<typename _CharT, typename _Traits, typename _Alloc>
77 class basic_string
78 {
79 typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
80 rebind<_CharT>::other _Char_alloc_type;
81 typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
82
83 // Types:
84 public:
85 typedef _Traits traits_type;
86 typedef typename _Traits::char_type value_type;
87 typedef _Char_alloc_type allocator_type;
88 typedef typename _Alloc_traits::size_type size_type;
89 typedef typename _Alloc_traits::difference_type difference_type;
90 typedef typename _Alloc_traits::reference reference;
91 typedef typename _Alloc_traits::const_reference const_reference;
92 typedef typename _Alloc_traits::pointer pointer;
93 typedef typename _Alloc_traits::const_pointer const_pointer;
94 typedef __gnu_cxx::__normal_iterator<pointer, basic_string> iterator;
95 typedef __gnu_cxx::__normal_iterator<const_pointer, basic_string>
96 const_iterator;
97 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
98 typedef std::reverse_iterator<iterator> reverse_iterator;
99
100 /// Value returned by various member functions when they fail.
101 static const size_type npos = static_cast<size_type>(-1);
102
103 protected:
104 // type used for positions in insert, erase etc.
105#if __cplusplus201703L < 201103L
106 typedef iterator __const_iterator;
107#else
108 typedef const_iterator __const_iterator;
109#endif
110
111 private:
112#if __cplusplus201703L >= 201703L
113 // A helper type for avoiding boiler-plate.
114 typedef basic_string_view<_CharT, _Traits> __sv_type;
115
116 template<typename _Tp, typename _Res>
117 using _If_sv = enable_if_t<
118 __and_<is_convertible<const _Tp&, __sv_type>,
119 __not_<is_convertible<const _Tp*, const basic_string*>>,
120 __not_<is_convertible<const _Tp&, const _CharT*>>>::value,
121 _Res>;
122
123 // Allows an implicit conversion to __sv_type.
124 static __sv_type
125 _S_to_string_view(__sv_type __svt) noexcept
126 { return __svt; }
127
128 // Wraps a string_view by explicit conversion and thus
129 // allows to add an internal constructor that does not
130 // participate in overload resolution when a string_view
131 // is provided.
132 struct __sv_wrapper
133 {
134 explicit __sv_wrapper(__sv_type __sv) noexcept : _M_sv(__sv) { }
135 __sv_type _M_sv;
136 };
137
138 /**
139 * @brief Only internally used: Construct string from a string view
140 * wrapper.
141 * @param __svw string view wrapper.
142 * @param __a Allocator to use.
143 */
144 explicit
145 basic_string(__sv_wrapper __svw, const _Alloc& __a)
146 : basic_string(__svw._M_sv.data(), __svw._M_sv.size(), __a) { }
147#endif
148
149 // Use empty-base optimization: http://www.cantrip.org/emptyopt.html
150 struct _Alloc_hider : allocator_type // TODO check __is_final
151 {
152#if __cplusplus201703L < 201103L
153 _Alloc_hider(pointer __dat, const _Alloc& __a = _Alloc())
154 : allocator_type(__a), _M_p(__dat) { }
155#else
156 _Alloc_hider(pointer __dat, const _Alloc& __a)
157 : allocator_type(__a), _M_p(__dat) { }
158
159 _Alloc_hider(pointer __dat, _Alloc&& __a = _Alloc())
160 : allocator_type(std::move(__a)), _M_p(__dat) { }
161#endif
162
163 pointer _M_p; // The actual data.
164 };
165
166 _Alloc_hider _M_dataplus;
167 size_type _M_string_length;
168
169 enum { _S_local_capacity = 15 / sizeof(_CharT) };
170
171 union
172 {
173 _CharT _M_local_buf[_S_local_capacity + 1];
174 size_type _M_allocated_capacity;
175 };
176
177 void
178 _M_data(pointer __p)
179 { _M_dataplus._M_p = __p; }
180
181 void
182 _M_length(size_type __length)
183 { _M_string_length = __length; }
184
185 pointer
186 _M_data() const
187 { return _M_dataplus._M_p; }
188
189 pointer
190 _M_local_data()
191 {
192#if __cplusplus201703L >= 201103L
193 return std::pointer_traits<pointer>::pointer_to(*_M_local_buf);
194#else
195 return pointer(_M_local_buf);
196#endif
197 }
198
199 const_pointer
200 _M_local_data() const
201 {
202#if __cplusplus201703L >= 201103L
203 return std::pointer_traits<const_pointer>::pointer_to(*_M_local_buf);
204#else
205 return const_pointer(_M_local_buf);
206#endif
207 }
208
209 void
210 _M_capacity(size_type __capacity)
211 { _M_allocated_capacity = __capacity; }
212
213 void
214 _M_set_length(size_type __n)
215 {
216 _M_length(__n);
217 traits_type::assign(_M_data()[__n], _CharT());
218 }
219
220 bool
221 _M_is_local() const
222 { return _M_data() == _M_local_data(); }
223
224 // Create & Destroy
225 pointer
226 _M_create(size_type&, size_type);
227
228 void
229 _M_dispose()
230 {
231 if (!_M_is_local())
232 _M_destroy(_M_allocated_capacity);
233 }
234
235 void
236 _M_destroy(size_type __size) throw()
237 { _Alloc_traits::deallocate(_M_get_allocator(), _M_data(), __size + 1); }
238
239 // _M_construct_aux is used to implement the 21.3.1 para 15 which
240 // requires special behaviour if _InIterator is an integral type
241 template<typename _InIterator>
242 void
243 _M_construct_aux(_InIterator __beg, _InIterator __end,
244 std::__false_type)
245 {
246 typedef typename iterator_traits<_InIterator>::iterator_category _Tag;
247 _M_construct(__beg, __end, _Tag());
248 }
249
250 // _GLIBCXX_RESOLVE_LIB_DEFECTS
251 // 438. Ambiguity in the "do the right thing" clause
252 template<typename _Integer>
253 void
254 _M_construct_aux(_Integer __beg, _Integer __end, std::__true_type)
255 { _M_construct_aux_2(static_cast<size_type>(__beg), __end); }
256
257 void
258 _M_construct_aux_2(size_type __req, _CharT __c)
259 { _M_construct(__req, __c); }
260
261 template<typename _InIterator>
262 void
263 _M_construct(_InIterator __beg, _InIterator __end)
264 {
265 typedef typename std::__is_integer<_InIterator>::__type _Integral;
266 _M_construct_aux(__beg, __end, _Integral());
267 }
268
269 // For Input Iterators, used in istreambuf_iterators, etc.
270 template<typename _InIterator>
271 void
272 _M_construct(_InIterator __beg, _InIterator __end,
273 std::input_iterator_tag);
274
275 // For forward_iterators up to random_access_iterators, used for
276 // string::iterator, _CharT*, etc.
277 template<typename _FwdIterator>
278 void
279 _M_construct(_FwdIterator __beg, _FwdIterator __end,
280 std::forward_iterator_tag);
281
282 void
283 _M_construct(size_type __req, _CharT __c);
284
285 allocator_type&
286 _M_get_allocator()
287 { return _M_dataplus; }
288
289 const allocator_type&
290 _M_get_allocator() const
291 { return _M_dataplus; }
292
293 private:
294
295#ifdef _GLIBCXX_DISAMBIGUATE_REPLACE_INST
296 // The explicit instantiations in misc-inst.cc require this due to
297 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64063
298 template<typename _Tp, bool _Requires =
299 !__are_same<_Tp, _CharT*>::__value
300 && !__are_same<_Tp, const _CharT*>::__value
301 && !__are_same<_Tp, iterator>::__value
302 && !__are_same<_Tp, const_iterator>::__value>
303 struct __enable_if_not_native_iterator
304 { typedef basic_string& __type; };
305 template<typename _Tp>
306 struct __enable_if_not_native_iterator<_Tp, false> { };
307#endif
308
309 size_type
310 _M_check(size_type __pos, const char* __s) const
311 {
312 if (__pos > this->size())
313 __throw_out_of_range_fmt(__N("%s: __pos (which is %zu) > "("%s: __pos (which is %zu) > " "this->size() (which is %zu)"
)
314 "this->size() (which is %zu)")("%s: __pos (which is %zu) > " "this->size() (which is %zu)"
)
,
315 __s, __pos, this->size());
316 return __pos;
317 }
318
319 void
320 _M_check_length(size_type __n1, size_type __n2, const char* __s) const
321 {
322 if (this->max_size() - (this->size() - __n1) < __n2)
323 __throw_length_error(__N(__s)(__s));
324 }
325
326
327 // NB: _M_limit doesn't check for a bad __pos value.
328 size_type
329 _M_limit(size_type __pos, size_type __off) const _GLIBCXX_NOEXCEPTnoexcept
330 {
331 const bool __testoff = __off < this->size() - __pos;
332 return __testoff ? __off : this->size() - __pos;
333 }
334
335 // True if _Rep and source do not overlap.
336 bool
337 _M_disjunct(const _CharT* __s) const _GLIBCXX_NOEXCEPTnoexcept
338 {
339 return (less<const _CharT*>()(__s, _M_data())
340 || less<const _CharT*>()(_M_data() + this->size(), __s));
341 }
342
343 // When __n = 1 way faster than the general multichar
344 // traits_type::copy/move/assign.
345 static void
346 _S_copy(_CharT* __d, const _CharT* __s, size_type __n)
347 {
348 if (__n == 1)
349 traits_type::assign(*__d, *__s);
350 else
351 traits_type::copy(__d, __s, __n);
352 }
353
354 static void
355 _S_move(_CharT* __d, const _CharT* __s, size_type __n)
356 {
357 if (__n == 1)
358 traits_type::assign(*__d, *__s);
359 else
360 traits_type::move(__d, __s, __n);
361 }
362
363 static void
364 _S_assign(_CharT* __d, size_type __n, _CharT __c)
365 {
366 if (__n == 1)
367 traits_type::assign(*__d, __c);
368 else
369 traits_type::assign(__d, __n, __c);
370 }
371
372 // _S_copy_chars is a separate template to permit specialization
373 // to optimize for the common case of pointers as iterators.
374 template<class _Iterator>
375 static void
376 _S_copy_chars(_CharT* __p, _Iterator __k1, _Iterator __k2)
377 {
378 for (; __k1 != __k2; ++__k1, (void)++__p)
379 traits_type::assign(*__p, *__k1); // These types are off.
380 }
381
382 static void
383 _S_copy_chars(_CharT* __p, iterator __k1, iterator __k2) _GLIBCXX_NOEXCEPTnoexcept
384 { _S_copy_chars(__p, __k1.base(), __k2.base()); }
385
386 static void
387 _S_copy_chars(_CharT* __p, const_iterator __k1, const_iterator __k2)
388 _GLIBCXX_NOEXCEPTnoexcept
389 { _S_copy_chars(__p, __k1.base(), __k2.base()); }
390
391 static void
392 _S_copy_chars(_CharT* __p, _CharT* __k1, _CharT* __k2) _GLIBCXX_NOEXCEPTnoexcept
393 { _S_copy(__p, __k1, __k2 - __k1); }
394
395 static void
396 _S_copy_chars(_CharT* __p, const _CharT* __k1, const _CharT* __k2)
397 _GLIBCXX_NOEXCEPTnoexcept
398 { _S_copy(__p, __k1, __k2 - __k1); }
399
400 static int
401 _S_compare(size_type __n1, size_type __n2) _GLIBCXX_NOEXCEPTnoexcept
402 {
403 const difference_type __d = difference_type(__n1 - __n2);
404
405 if (__d > __gnu_cxx::__numeric_traits<int>::__max)
406 return __gnu_cxx::__numeric_traits<int>::__max;
407 else if (__d < __gnu_cxx::__numeric_traits<int>::__min)
408 return __gnu_cxx::__numeric_traits<int>::__min;
409 else
410 return int(__d);
411 }
412
413 void
414 _M_assign(const basic_string&);
415
416 void
417 _M_mutate(size_type __pos, size_type __len1, const _CharT* __s,
418 size_type __len2);
419
420 void
421 _M_erase(size_type __pos, size_type __n);
422
423 public:
424 // Construct/copy/destroy:
425 // NB: We overload ctors in some cases instead of using default
426 // arguments, per 17.4.4.4 para. 2 item 2.
427
428 /**
429 * @brief Default constructor creates an empty string.
430 */
431 basic_string()
432 _GLIBCXX_NOEXCEPT_IF(is_nothrow_default_constructible<_Alloc>::value)noexcept(is_nothrow_default_constructible<_Alloc>::value
)
433 : _M_dataplus(_M_local_data())
434 { _M_set_length(0); }
435
436 /**
437 * @brief Construct an empty string using allocator @a a.
438 */
439 explicit
440 basic_string(const _Alloc& __a) _GLIBCXX_NOEXCEPTnoexcept
441 : _M_dataplus(_M_local_data(), __a)
442 { _M_set_length(0); }
443
444 /**
445 * @brief Construct string with copy of value of @a __str.
446 * @param __str Source string.
447 */
448 basic_string(const basic_string& __str)
449 : _M_dataplus(_M_local_data(),
450 _Alloc_traits::_S_select_on_copy(__str._M_get_allocator()))
451 { _M_construct(__str._M_data(), __str._M_data() + __str.length()); }
452
453 // _GLIBCXX_RESOLVE_LIB_DEFECTS
454 // 2583. no way to supply an allocator for basic_string(str, pos)
455 /**
456 * @brief Construct string as copy of a substring.
457 * @param __str Source string.
458 * @param __pos Index of first character to copy from.
459 * @param __a Allocator to use.
460 */
461 basic_string(const basic_string& __str, size_type __pos,
462 const _Alloc& __a = _Alloc())
463 : _M_dataplus(_M_local_data(), __a)
464 {
465 const _CharT* __start = __str._M_data()
466 + __str._M_check(__pos, "basic_string::basic_string");
467 _M_construct(__start, __start + __str._M_limit(__pos, npos));
468 }
469
470 /**
471 * @brief Construct string as copy of a substring.
472 * @param __str Source string.
473 * @param __pos Index of first character to copy from.
474 * @param __n Number of characters to copy.
475 */
476 basic_string(const basic_string& __str, size_type __pos,
477 size_type __n)
478 : _M_dataplus(_M_local_data())
479 {
480 const _CharT* __start = __str._M_data()
481 + __str._M_check(__pos, "basic_string::basic_string");
482 _M_construct(__start, __start + __str._M_limit(__pos, __n));
483 }
484
485 /**
486 * @brief Construct string as copy of a substring.
487 * @param __str Source string.
488 * @param __pos Index of first character to copy from.
489 * @param __n Number of characters to copy.
490 * @param __a Allocator to use.
491 */
492 basic_string(const basic_string& __str, size_type __pos,
493 size_type __n, const _Alloc& __a)
494 : _M_dataplus(_M_local_data(), __a)
495 {
496 const _CharT* __start
497 = __str._M_data() + __str._M_check(__pos, "string::string");
498 _M_construct(__start, __start + __str._M_limit(__pos, __n));
499 }
500
501 /**
502 * @brief Construct string initialized by a character %array.
503 * @param __s Source character %array.
504 * @param __n Number of characters to copy.
505 * @param __a Allocator to use (default is default allocator).
506 *
507 * NB: @a __s must have at least @a __n characters, &apos;\\0&apos;
508 * has no special meaning.
509 */
510 basic_string(const _CharT* __s, size_type __n,
511 const _Alloc& __a = _Alloc())
512 : _M_dataplus(_M_local_data(), __a)
513 { _M_construct(__s, __s + __n); }
514
515 /**
516 * @brief Construct string as copy of a C string.
517 * @param __s Source C string.
518 * @param __a Allocator to use (default is default allocator).
519 */
520#if __cpp_deduction_guides201703L && ! defined _GLIBCXX_DEFINING_STRING_INSTANTIATIONS
521 // _GLIBCXX_RESOLVE_LIB_DEFECTS
522 // 3076. basic_string CTAD ambiguity
523 template<typename = _RequireAllocator<_Alloc>>
524#endif
525 basic_string(const _CharT* __s, const _Alloc& __a = _Alloc())
526 : _M_dataplus(_M_local_data(), __a)
527 { _M_construct(__s, __s ? __s + traits_type::length(__s) : __s+npos); }
528
529 /**
530 * @brief Construct string as multiple characters.
531 * @param __n Number of characters.
532 * @param __c Character to use.
533 * @param __a Allocator to use (default is default allocator).
534 */
535#if __cpp_deduction_guides201703L && ! defined _GLIBCXX_DEFINING_STRING_INSTANTIATIONS
536 // _GLIBCXX_RESOLVE_LIB_DEFECTS
537 // 3076. basic_string CTAD ambiguity
538 template<typename = _RequireAllocator<_Alloc>>
539#endif
540 basic_string(size_type __n, _CharT __c, const _Alloc& __a = _Alloc())
541 : _M_dataplus(_M_local_data(), __a)
542 { _M_construct(__n, __c); }
543
544#if __cplusplus201703L >= 201103L
545 /**
546 * @brief Move construct string.
547 * @param __str Source string.
548 *
549 * The newly-created string contains the exact contents of @a __str.
550 * @a __str is a valid, but unspecified string.
551 **/
552 basic_string(basic_string&& __str) noexcept
553 : _M_dataplus(_M_local_data(), std::move(__str._M_get_allocator()))
554 {
555 if (__str._M_is_local())
556 {
557 traits_type::copy(_M_local_buf, __str._M_local_buf,
558 _S_local_capacity + 1);
559 }
560 else
561 {
562 _M_data(__str._M_data());
563 _M_capacity(__str._M_allocated_capacity);
564 }
565
566 // Must use _M_length() here not _M_set_length() because
567 // basic_stringbuf relies on writing into unallocated capacity so
568 // we mess up the contents if we put a '\0' in the string.
569 _M_length(__str.length());
570 __str._M_data(__str._M_local_data());
571 __str._M_set_length(0);
572 }
573
574 /**
575 * @brief Construct string from an initializer %list.
576 * @param __l std::initializer_list of characters.
577 * @param __a Allocator to use (default is default allocator).
578 */
579 basic_string(initializer_list<_CharT> __l, const _Alloc& __a = _Alloc())
580 : _M_dataplus(_M_local_data(), __a)
581 { _M_construct(__l.begin(), __l.end()); }
582
583 basic_string(const basic_string& __str, const _Alloc& __a)
584 : _M_dataplus(_M_local_data(), __a)
585 { _M_construct(__str.begin(), __str.end()); }
586
587 basic_string(basic_string&& __str, const _Alloc& __a)
588 noexcept(_Alloc_traits::_S_always_equal())
589 : _M_dataplus(_M_local_data(), __a)
590 {
591 if (__str._M_is_local())
592 {
593 traits_type::copy(_M_local_buf, __str._M_local_buf,
594 _S_local_capacity + 1);
595 _M_length(__str.length());
596 __str._M_set_length(0);
597 }
598 else if (_Alloc_traits::_S_always_equal()
599 || __str.get_allocator() == __a)
600 {
601 _M_data(__str._M_data());
602 _M_length(__str.length());
603 _M_capacity(__str._M_allocated_capacity);
604 __str._M_data(__str._M_local_buf);
605 __str._M_set_length(0);
606 }
607 else
608 _M_construct(__str.begin(), __str.end());
609 }
610
611#endif // C++11
612
613 /**
614 * @brief Construct string as copy of a range.
615 * @param __beg Start of range.
616 * @param __end End of range.
617 * @param __a Allocator to use (default is default allocator).
618 */
619#if __cplusplus201703L >= 201103L
620 template<typename _InputIterator,
621 typename = std::_RequireInputIter<_InputIterator>>
622#else
623 template<typename _InputIterator>
624#endif
625 basic_string(_InputIterator __beg, _InputIterator __end,
626 const _Alloc& __a = _Alloc())
627 : _M_dataplus(_M_local_data(), __a)
628 { _M_construct(__beg, __end); }
629
630#if __cplusplus201703L >= 201703L
631 /**
632 * @brief Construct string from a substring of a string_view.
633 * @param __t Source object convertible to string view.
634 * @param __pos The index of the first character to copy from __t.
635 * @param __n The number of characters to copy from __t.
636 * @param __a Allocator to use.
637 */
638 template<typename _Tp, typename = _If_sv<_Tp, void>>
639 basic_string(const _Tp& __t, size_type __pos, size_type __n,
640 const _Alloc& __a = _Alloc())
641 : basic_string(_S_to_string_view(__t).substr(__pos, __n), __a) { }
642
643 /**
644 * @brief Construct string from a string_view.
645 * @param __t Source object convertible to string view.
646 * @param __a Allocator to use (default is default allocator).
647 */
648 template<typename _Tp, typename = _If_sv<_Tp, void>>
649 explicit
650 basic_string(const _Tp& __t, const _Alloc& __a = _Alloc())
651 : basic_string(__sv_wrapper(_S_to_string_view(__t)), __a) { }
652#endif // C++17
653
654 /**
655 * @brief Destroy the string instance.
656 */
657 ~basic_string()
658 { _M_dispose(); }
659
660 /**
661 * @brief Assign the value of @a str to this string.
662 * @param __str Source string.
663 */
664 basic_string&
665 operator=(const basic_string& __str)
666 {
667 return this->assign(__str);
668 }
669
670 /**
671 * @brief Copy contents of @a s into this string.
672 * @param __s Source null-terminated string.
673 */
674 basic_string&
675 operator=(const _CharT* __s)
676 { return this->assign(__s); }
677
678 /**
679 * @brief Set value to string of length 1.
680 * @param __c Source character.
681 *
682 * Assigning to a character makes this string length 1 and
683 * (*this)[0] == @a c.
684 */
685 basic_string&
686 operator=(_CharT __c)
687 {
688 this->assign(1, __c);
689 return *this;
690 }
691
692#if __cplusplus201703L >= 201103L
693 /**
694 * @brief Move assign the value of @a str to this string.
695 * @param __str Source string.
696 *
697 * The contents of @a str are moved into this string (without copying).
698 * @a str is a valid, but unspecified string.
699 **/
700 // _GLIBCXX_RESOLVE_LIB_DEFECTS
701 // 2063. Contradictory requirements for string move assignment
702 basic_string&
703 operator=(basic_string&& __str)
704 noexcept(_Alloc_traits::_S_nothrow_move())
705 {
706 if (!_M_is_local() && _Alloc_traits::_S_propagate_on_move_assign()
707 && !_Alloc_traits::_S_always_equal()
708 && _M_get_allocator() != __str._M_get_allocator())
709 {
710 // Destroy existing storage before replacing allocator.
711 _M_destroy(_M_allocated_capacity);
712 _M_data(_M_local_data());
713 _M_set_length(0);
714 }
715 // Replace allocator if POCMA is true.
716 std::__alloc_on_move(_M_get_allocator(), __str._M_get_allocator());
717
718 if (__str._M_is_local())
719 {
720 // We've always got room for a short string, just copy it.
721 if (__str.size())
722 this->_S_copy(_M_data(), __str._M_data(), __str.size());
723 _M_set_length(__str.size());
724 }
725 else if (_Alloc_traits::_S_propagate_on_move_assign()
726 || _Alloc_traits::_S_always_equal()
727 || _M_get_allocator() == __str._M_get_allocator())
728 {
729 // Just move the allocated pointer, our allocator can free it.
730 pointer __data = nullptr;
731 size_type __capacity;
732 if (!_M_is_local())
733 {
734 if (_Alloc_traits::_S_always_equal())
735 {
736 // __str can reuse our existing storage.
737 __data = _M_data();
738 __capacity = _M_allocated_capacity;
739 }
740 else // __str can't use it, so free it.
741 _M_destroy(_M_allocated_capacity);
742 }
743
744 _M_data(__str._M_data());
745 _M_length(__str.length());
746 _M_capacity(__str._M_allocated_capacity);
747 if (__data)
748 {
749 __str._M_data(__data);
750 __str._M_capacity(__capacity);
751 }
752 else
753 __str._M_data(__str._M_local_buf);
754 }
755 else // Need to do a deep copy
756 assign(__str);
757 __str.clear();
758 return *this;
759 }
760
761 /**
762 * @brief Set value to string constructed from initializer %list.
763 * @param __l std::initializer_list.
764 */
765 basic_string&
766 operator=(initializer_list<_CharT> __l)
767 {
768 this->assign(__l.begin(), __l.size());
769 return *this;
770 }
771#endif // C++11
772
773#if __cplusplus201703L >= 201703L
774 /**
775 * @brief Set value to string constructed from a string_view.
776 * @param __svt An object convertible to string_view.
777 */
778 template<typename _Tp>
779 _If_sv<_Tp, basic_string&>
780 operator=(const _Tp& __svt)
781 { return this->assign(__svt); }
782
783 /**
784 * @brief Convert to a string_view.
785 * @return A string_view.
786 */
787 operator __sv_type() const noexcept
788 { return __sv_type(data(), size()); }
789#endif // C++17
790
791 // Iterators:
792 /**
793 * Returns a read/write iterator that points to the first character in
794 * the %string.
795 */
796 iterator
797 begin() _GLIBCXX_NOEXCEPTnoexcept
798 { return iterator(_M_data()); }
799
800 /**
801 * Returns a read-only (constant) iterator that points to the first
802 * character in the %string.
803 */
804 const_iterator
805 begin() const _GLIBCXX_NOEXCEPTnoexcept
806 { return const_iterator(_M_data()); }
807
808 /**
809 * Returns a read/write iterator that points one past the last
810 * character in the %string.
811 */
812 iterator
813 end() _GLIBCXX_NOEXCEPTnoexcept
814 { return iterator(_M_data() + this->size()); }
815
816 /**
817 * Returns a read-only (constant) iterator that points one past the
818 * last character in the %string.
819 */
820 const_iterator
821 end() const _GLIBCXX_NOEXCEPTnoexcept
822 { return const_iterator(_M_data() + this->size()); }
823
824 /**
825 * Returns a read/write reverse iterator that points to the last
826 * character in the %string. Iteration is done in reverse element
827 * order.
828 */
829 reverse_iterator
830 rbegin() _GLIBCXX_NOEXCEPTnoexcept
831 { return reverse_iterator(this->end()); }
832
833 /**
834 * Returns a read-only (constant) reverse iterator that points
835 * to the last character in the %string. Iteration is done in
836 * reverse element order.
837 */
838 const_reverse_iterator
839 rbegin() const _GLIBCXX_NOEXCEPTnoexcept
840 { return const_reverse_iterator(this->end()); }
841
842 /**
843 * Returns a read/write reverse iterator that points to one before the
844 * first character in the %string. Iteration is done in reverse
845 * element order.
846 */
847 reverse_iterator
848 rend() _GLIBCXX_NOEXCEPTnoexcept
849 { return reverse_iterator(this->begin()); }
850
851 /**
852 * Returns a read-only (constant) reverse iterator that points
853 * to one before the first character in the %string. Iteration
854 * is done in reverse element order.
855 */
856 const_reverse_iterator
857 rend() const _GLIBCXX_NOEXCEPTnoexcept
858 { return const_reverse_iterator(this->begin()); }
859
860#if __cplusplus201703L >= 201103L
861 /**
862 * Returns a read-only (constant) iterator that points to the first
863 * character in the %string.
864 */
865 const_iterator
866 cbegin() const noexcept
867 { return const_iterator(this->_M_data()); }
868
869 /**
870 * Returns a read-only (constant) iterator that points one past the
871 * last character in the %string.
872 */
873 const_iterator
874 cend() const noexcept
875 { return const_iterator(this->_M_data() + this->size()); }
876
877 /**
878 * Returns a read-only (constant) reverse iterator that points
879 * to the last character in the %string. Iteration is done in
880 * reverse element order.
881 */
882 const_reverse_iterator
883 crbegin() const noexcept
884 { return const_reverse_iterator(this->end()); }
885
886 /**
887 * Returns a read-only (constant) reverse iterator that points
888 * to one before the first character in the %string. Iteration
889 * is done in reverse element order.
890 */
891 const_reverse_iterator
892 crend() const noexcept
893 { return const_reverse_iterator(this->begin()); }
894#endif
895
896 public:
897 // Capacity:
898 /// Returns the number of characters in the string, not including any
899 /// null-termination.
900 size_type
901 size() const _GLIBCXX_NOEXCEPTnoexcept
902 { return _M_string_length; }
903
904 /// Returns the number of characters in the string, not including any
905 /// null-termination.
906 size_type
907 length() const _GLIBCXX_NOEXCEPTnoexcept
908 { return _M_string_length; }
909
910 /// Returns the size() of the largest possible %string.
911 size_type
912 max_size() const _GLIBCXX_NOEXCEPTnoexcept
913 { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; }
914
915 /**
916 * @brief Resizes the %string to the specified number of characters.
917 * @param __n Number of characters the %string should contain.
918 * @param __c Character to fill any new elements.
919 *
920 * This function will %resize the %string to the specified
921 * number of characters. If the number is smaller than the
922 * %string's current size the %string is truncated, otherwise
923 * the %string is extended and new elements are %set to @a __c.
924 */
925 void
926 resize(size_type __n, _CharT __c);
927
928 /**
929 * @brief Resizes the %string to the specified number of characters.
930 * @param __n Number of characters the %string should contain.
931 *
932 * This function will resize the %string to the specified length. If
933 * the new size is smaller than the %string's current size the %string
934 * is truncated, otherwise the %string is extended and new characters
935 * are default-constructed. For basic types such as char, this means
936 * setting them to 0.
937 */
938 void
939 resize(size_type __n)
940 { this->resize(__n, _CharT()); }
941
942#if __cplusplus201703L >= 201103L
943 /// A non-binding request to reduce capacity() to size().
944 void
945 shrink_to_fit() noexcept
946 {
947#if __cpp_exceptions
948 if (capacity() > size())
949 {
950 try
951 { reserve(0); }
952 catch(...)
953 { }
954 }
955#endif
956 }
957#endif
958
959 /**
960 * Returns the total number of characters that the %string can hold
961 * before needing to allocate more memory.
962 */
963 size_type
964 capacity() const _GLIBCXX_NOEXCEPTnoexcept
965 {
966 return _M_is_local() ? size_type(_S_local_capacity)
967 : _M_allocated_capacity;
968 }
969
970 /**
971 * @brief Attempt to preallocate enough memory for specified number of
972 * characters.
973 * @param __res_arg Number of characters required.
974 * @throw std::length_error If @a __res_arg exceeds @c max_size().
975 *
976 * This function attempts to reserve enough memory for the
977 * %string to hold the specified number of characters. If the
978 * number requested is more than max_size(), length_error is
979 * thrown.
980 *
981 * The advantage of this function is that if optimal code is a
982 * necessity and the user can determine the string length that will be
983 * required, the user can reserve the memory in %advance, and thus
984 * prevent a possible reallocation of memory and copying of %string
985 * data.
986 */
987 void
988 reserve(size_type __res_arg = 0);
989
990 /**
991 * Erases the string, making it empty.
992 */
993 void
994 clear() _GLIBCXX_NOEXCEPTnoexcept
995 { _M_set_length(0); }
996
997 /**
998 * Returns true if the %string is empty. Equivalent to
999 * <code>*this == ""</code>.
1000 */
1001 _GLIBCXX_NODISCARD[[__nodiscard__]] bool
1002 empty() const _GLIBCXX_NOEXCEPTnoexcept
1003 { return this->size() == 0; }
1004
1005 // Element access:
1006 /**
1007 * @brief Subscript access to the data contained in the %string.
1008 * @param __pos The index of the character to access.
1009 * @return Read-only (constant) reference to the character.
1010 *
1011 * This operator allows for easy, array-style, data access.
1012 * Note that data access with this operator is unchecked and
1013 * out_of_range lookups are not defined. (For checked lookups
1014 * see at().)
1015 */
1016 const_reference
1017 operator[] (size_type __pos) const _GLIBCXX_NOEXCEPTnoexcept
1018 {
1019 __glibcxx_assert(__pos <= size())do { if (! (__pos <= size())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h"
, 1019, __PRETTY_FUNCTION__, "__pos <= size()"); } while (
false)
;
1020 return _M_data()[__pos];
1021 }
1022
1023 /**
1024 * @brief Subscript access to the data contained in the %string.
1025 * @param __pos The index of the character to access.
1026 * @return Read/write reference to the character.
1027 *
1028 * This operator allows for easy, array-style, data access.
1029 * Note that data access with this operator is unchecked and
1030 * out_of_range lookups are not defined. (For checked lookups
1031 * see at().)
1032 */
1033 reference
1034 operator[](size_type __pos)
1035 {
1036 // Allow pos == size() both in C++98 mode, as v3 extension,
1037 // and in C++11 mode.
1038 __glibcxx_assert(__pos <= size())do { if (! (__pos <= size())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h"
, 1038, __PRETTY_FUNCTION__, "__pos <= size()"); } while (
false)
;
1039 // In pedantic mode be strict in C++98 mode.
1040 _GLIBCXX_DEBUG_PEDASSERT(__cplusplus >= 201103L || __pos < size());
1041 return _M_data()[__pos];
1042 }
1043
1044 /**
1045 * @brief Provides access to the data contained in the %string.
1046 * @param __n The index of the character to access.
1047 * @return Read-only (const) reference to the character.
1048 * @throw std::out_of_range If @a n is an invalid index.
1049 *
1050 * This function provides for safer data access. The parameter is
1051 * first checked that it is in the range of the string. The function
1052 * throws out_of_range if the check fails.
1053 */
1054 const_reference
1055 at(size_type __n) const
1056 {
1057 if (__n >= this->size())
1058 __throw_out_of_range_fmt(__N("basic_string::at: __n "("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
1059 "(which is %zu) >= this->size() "("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
1060 "(which is %zu)")("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
,
1061 __n, this->size());
1062 return _M_data()[__n];
1063 }
1064
1065 /**
1066 * @brief Provides access to the data contained in the %string.
1067 * @param __n The index of the character to access.
1068 * @return Read/write reference to the character.
1069 * @throw std::out_of_range If @a n is an invalid index.
1070 *
1071 * This function provides for safer data access. The parameter is
1072 * first checked that it is in the range of the string. The function
1073 * throws out_of_range if the check fails.
1074 */
1075 reference
1076 at(size_type __n)
1077 {
1078 if (__n >= size())
1079 __throw_out_of_range_fmt(__N("basic_string::at: __n "("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
1080 "(which is %zu) >= this->size() "("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
1081 "(which is %zu)")("basic_string::at: __n " "(which is %zu) >= this->size() "
"(which is %zu)")
,
1082 __n, this->size());
1083 return _M_data()[__n];
1084 }
1085
1086#if __cplusplus201703L >= 201103L
1087 /**
1088 * Returns a read/write reference to the data at the first
1089 * element of the %string.
1090 */
1091 reference
1092 front() noexcept
1093 {
1094 __glibcxx_assert(!empty())do { if (! (!empty())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h"
, 1094, __PRETTY_FUNCTION__, "!empty()"); } while (false)
;
1095 return operator[](0);
1096 }
1097
1098 /**
1099 * Returns a read-only (constant) reference to the data at the first
1100 * element of the %string.
1101 */
1102 const_reference
1103 front() const noexcept
1104 {
1105 __glibcxx_assert(!empty())do { if (! (!empty())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h"
, 1105, __PRETTY_FUNCTION__, "!empty()"); } while (false)
;
1106 return operator[](0);
1107 }
1108
1109 /**
1110 * Returns a read/write reference to the data at the last
1111 * element of the %string.
1112 */
1113 reference
1114 back() noexcept
1115 {
1116 __glibcxx_assert(!empty())do { if (! (!empty())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h"
, 1116, __PRETTY_FUNCTION__, "!empty()"); } while (false)
;
1117 return operator[](this->size() - 1);
1118 }
1119
1120 /**
1121 * Returns a read-only (constant) reference to the data at the
1122 * last element of the %string.
1123 */
1124 const_reference
1125 back() const noexcept
1126 {
1127 __glibcxx_assert(!empty())do { if (! (!empty())) std::__replacement_assert("/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/basic_string.h"
, 1127, __PRETTY_FUNCTION__, "!empty()"); } while (false)
;
1128 return operator[](this->size() - 1);
1129 }
1130#endif
1131
1132 // Modifiers:
1133 /**
1134 * @brief Append a string to this string.
1135 * @param __str The string to append.
1136 * @return Reference to this string.
1137 */
1138 basic_string&
1139 operator+=(const basic_string& __str)
1140 { return this->append(__str); }
1141
1142 /**
1143 * @brief Append a C string.
1144 * @param __s The C string to append.
1145 * @return Reference to this string.
1146 */
1147 basic_string&
1148 operator+=(const _CharT* __s)
1149 { return this->append(__s); }
1150
1151 /**
1152 * @brief Append a character.
1153 * @param __c The character to append.
1154 * @return Reference to this string.
1155 */
1156 basic_string&
1157 operator+=(_CharT __c)
1158 {
1159 this->push_back(__c);
1160 return *this;
1161 }
1162
1163#if __cplusplus201703L >= 201103L
1164 /**
1165 * @brief Append an initializer_list of characters.
1166 * @param __l The initializer_list of characters to be appended.
1167 * @return Reference to this string.
1168 */
1169 basic_string&
1170 operator+=(initializer_list<_CharT> __l)
1171 { return this->append(__l.begin(), __l.size()); }
1172#endif // C++11
1173
1174#if __cplusplus201703L >= 201703L
1175 /**
1176 * @brief Append a string_view.
1177 * @param __svt An object convertible to string_view to be appended.
1178 * @return Reference to this string.
1179 */
1180 template<typename _Tp>
1181 _If_sv<_Tp, basic_string&>
1182 operator+=(const _Tp& __svt)
1183 { return this->append(__svt); }
1184#endif // C++17
1185
1186 /**
1187 * @brief Append a string to this string.
1188 * @param __str The string to append.
1189 * @return Reference to this string.
1190 */
1191 basic_string&
1192 append(const basic_string& __str)
1193 { return _M_append(__str._M_data(), __str.size()); }
1194
1195 /**
1196 * @brief Append a substring.
1197 * @param __str The string to append.
1198 * @param __pos Index of the first character of str to append.
1199 * @param __n The number of characters to append.
1200 * @return Reference to this string.
1201 * @throw std::out_of_range if @a __pos is not a valid index.
1202 *
1203 * This function appends @a __n characters from @a __str
1204 * starting at @a __pos to this string. If @a __n is is larger
1205 * than the number of available characters in @a __str, the
1206 * remainder of @a __str is appended.
1207 */
1208 basic_string&
1209 append(const basic_string& __str, size_type __pos, size_type __n = npos)
1210 { return _M_append(__str._M_data()
1211 + __str._M_check(__pos, "basic_string::append"),
1212 __str._M_limit(__pos, __n)); }
1213
1214 /**
1215 * @brief Append a C substring.
1216 * @param __s The C string to append.
1217 * @param __n The number of characters to append.
1218 * @return Reference to this string.
1219 */
1220 basic_string&
1221 append(const _CharT* __s, size_type __n)
1222 {
1223 __glibcxx_requires_string_len(__s, __n);
1224 _M_check_length(size_type(0), __n, "basic_string::append");
1225 return _M_append(__s, __n);
1226 }
1227
1228 /**
1229 * @brief Append a C string.
1230 * @param __s The C string to append.
1231 * @return Reference to this string.
1232 */
1233 basic_string&
1234 append(const _CharT* __s)
1235 {
1236 __glibcxx_requires_string(__s);
1237 const size_type __n = traits_type::length(__s);
1238 _M_check_length(size_type(0), __n, "basic_string::append");
1239 return _M_append(__s, __n);
1240 }
1241
1242 /**
1243 * @brief Append multiple characters.
1244 * @param __n The number of characters to append.
1245 * @param __c The character to use.
1246 * @return Reference to this string.
1247 *
1248 * Appends __n copies of __c to this string.
1249 */
1250 basic_string&
1251 append(size_type __n, _CharT __c)
1252 { return _M_replace_aux(this->size(), size_type(0), __n, __c); }
1253
1254#if __cplusplus201703L >= 201103L
1255 /**
1256 * @brief Append an initializer_list of characters.
1257 * @param __l The initializer_list of characters to append.
1258 * @return Reference to this string.
1259 */
1260 basic_string&
1261 append(initializer_list<_CharT> __l)
1262 { return this->append(__l.begin(), __l.size()); }
1263#endif // C++11
1264
1265 /**
1266 * @brief Append a range of characters.
1267 * @param __first Iterator referencing the first character to append.
1268 * @param __last Iterator marking the end of the range.
1269 * @return Reference to this string.
1270 *
1271 * Appends characters in the range [__first,__last) to this string.
1272 */
1273#if __cplusplus201703L >= 201103L
1274 template<class _InputIterator,
1275 typename = std::_RequireInputIter<_InputIterator>>
1276#else
1277 template<class _InputIterator>
1278#endif
1279 basic_string&
1280 append(_InputIterator __first, _InputIterator __last)
1281 { return this->replace(end(), end(), __first, __last); }
1282
1283#if __cplusplus201703L >= 201703L
1284 /**
1285 * @brief Append a string_view.
1286 * @param __svt An object convertible to string_view to be appended.
1287 * @return Reference to this string.
1288 */
1289 template<typename _Tp>
1290 _If_sv<_Tp, basic_string&>
1291 append(const _Tp& __svt)
1292 {
1293 __sv_type __sv = __svt;
1294 return this->append(__sv.data(), __sv.size());
1295 }
1296
1297 /**
1298 * @brief Append a range of characters from a string_view.
1299 * @param __svt An object convertible to string_view to be appended from.
1300 * @param __pos The position in the string_view to append from.
1301 * @param __n The number of characters to append from the string_view.
1302 * @return Reference to this string.
1303 */
1304 template<typename _Tp>
1305 _If_sv<_Tp, basic_string&>
1306 append(const _Tp& __svt, size_type __pos, size_type __n = npos)
1307 {
1308 __sv_type __sv = __svt;
1309 return _M_append(__sv.data()
1310 + std::__sv_check(__sv.size(), __pos, "basic_string::append"),
1311 std::__sv_limit(__sv.size(), __pos, __n));
1312 }
1313#endif // C++17
1314
1315 /**
1316 * @brief Append a single character.
1317 * @param __c Character to append.
1318 */
1319 void
1320 push_back(_CharT __c)
1321 {
1322 const size_type __size = this->size();
1323 if (__size + 1 > this->capacity())
1324 this->_M_mutate(__size, size_type(0), 0, size_type(1));
1325 traits_type::assign(this->_M_data()[__size], __c);
1326 this->_M_set_length(__size + 1);
1327 }
1328
1329 /**
1330 * @brief Set value to contents of another string.
1331 * @param __str Source string to use.
1332 * @return Reference to this string.
1333 */
1334 basic_string&
1335 assign(const basic_string& __str)
1336 {
1337#if __cplusplus201703L >= 201103L
1338 if (_Alloc_traits::_S_propagate_on_copy_assign())
1339 {
1340 if (!_Alloc_traits::_S_always_equal() && !_M_is_local()
1341 && _M_get_allocator() != __str._M_get_allocator())
1342 {
1343 // Propagating allocator cannot free existing storage so must
1344 // deallocate it before replacing current allocator.
1345 if (__str.size() <= _S_local_capacity)
1346 {
1347 _M_destroy(_M_allocated_capacity);
1348 _M_data(_M_local_data());
1349 _M_set_length(0);
1350 }
1351 else
1352 {
1353 const auto __len = __str.size();
1354 auto __alloc = __str._M_get_allocator();
1355 // If this allocation throws there are no effects:
1356 auto __ptr = _Alloc_traits::allocate(__alloc, __len + 1);
1357 _M_destroy(_M_allocated_capacity);
1358 _M_data(__ptr);
1359 _M_capacity(__len);
1360 _M_set_length(__len);
1361 }
1362 }
1363 std::__alloc_on_copy(_M_get_allocator(), __str._M_get_allocator());
1364 }
1365#endif
1366 this->_M_assign(__str);
1367 return *this;
1368 }
1369
1370#if __cplusplus201703L >= 201103L
1371 /**
1372 * @brief Set value to contents of another string.
1373 * @param __str Source string to use.
1374 * @return Reference to this string.
1375 *
1376 * This function sets this string to the exact contents of @a __str.
1377 * @a __str is a valid, but unspecified string.
1378 */
1379 basic_string&
1380 assign(basic_string&& __str)
1381 noexcept(_Alloc_traits::_S_nothrow_move())
1382 {
1383 // _GLIBCXX_RESOLVE_LIB_DEFECTS
1384 // 2063. Contradictory requirements for string move assignment
1385 return *this = std::move(__str);
1386 }
1387#endif // C++11
1388
1389 /**
1390 * @brief Set value to a substring of a string.
1391 * @param __str The string to use.
1392 * @param __pos Index of the first character of str.
1393 * @param __n Number of characters to use.
1394 * @return Reference to this string.
1395 * @throw std::out_of_range if @a pos is not a valid index.
1396 *
1397 * This function sets this string to the substring of @a __str
1398 * consisting of @a __n characters at @a __pos. If @a __n is
1399 * is larger than the number of available characters in @a
1400 * __str, the remainder of @a __str is used.
1401 */
1402 basic_string&
1403 assign(const basic_string& __str, size_type __pos, size_type __n = npos)
1404 { return _M_replace(size_type(0), this->size(), __str._M_data()
1405 + __str._M_check(__pos, "basic_string::assign"),
1406 __str._M_limit(__pos, __n)); }
1407
1408